Mesa (master): r300/compiler: implement DP2 opcode

Marek Olšák mareko at kemper.freedesktop.org
Sun Aug 15 22:08:43 UTC 2010


Module: Mesa
Branch: master
Commit: 27eb2e275544d78a229eaded9bafc0db60172675
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=27eb2e275544d78a229eaded9bafc0db60172675

Author: Marek Olšák <maraeo at gmail.com>
Date:   Sun Aug 15 23:44:16 2010 +0200

r300/compiler: implement DP2 opcode

---

 src/gallium/drivers/r300/r300_tgsi_to_rc.c         |    2 +-
 .../drivers/dri/r300/compiler/radeon_opcodes.c     |   10 +++++
 .../drivers/dri/r300/compiler/radeon_opcodes.h     |    3 +
 .../drivers/dri/r300/compiler/radeon_program_alu.c |   43 +++++++++++++++----
 4 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 53f07c1..a4911b9 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -103,7 +103,7 @@ static unsigned translate_opcode(unsigned opcode)
         case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
      /* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */
      /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
-     /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
+        case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
         case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
         case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
         case TGSI_OPCODE_IF: return RC_OPCODE_IF;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 0602e02..da495a3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -95,6 +95,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
 		.IsComponentwise = 1
 	},
 	{
+		.Opcode = RC_OPCODE_DP2,
+		.Name = "DP2",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
 		.Opcode = RC_OPCODE_DP3,
 		.Name = "DP3",
 		.NumSrcRegs = 2,
@@ -442,6 +448,10 @@ void rc_compute_sources_for_writemask(
 		case RC_OPCODE_ARL:
 			srcmasks[0] |= RC_MASK_X;
 			break;
+		case RC_OPCODE_DP2:
+			srcmasks[0] |= RC_MASK_XY;
+			srcmasks[1] |= RC_MASK_XY;
+			break;
 		case RC_OPCODE_DP3:
 			srcmasks[0] |= RC_MASK_XYZ;
 			srcmasks[1] |= RC_MASK_XYZ;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 0a9fcee..d3f639c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -64,6 +64,9 @@ typedef enum {
 	 * dst.c = d src0.c / dy */
 	RC_OPCODE_DDY,
 
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
+	RC_OPCODE_DP2,
+
 	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
 	RC_OPCODE_DP3,
 
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index c988e0c..704a7bb 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -216,18 +216,18 @@ static void transform_CEIL(struct radeon_compiler* c,
 	rc_remove_instruction(inst);
 }
 
-static void transform_DP3(struct radeon_compiler* c,
+static void transform_DP2(struct radeon_compiler* c,
 	struct rc_instruction* inst)
 {
 	struct rc_src_register src0 = inst->U.I.SrcReg[0];
 	struct rc_src_register src1 = inst->U.I.SrcReg[1];
-	src0.Negate &= ~RC_MASK_W;
-	src0.Swizzle &= ~(7 << (3 * 3));
-	src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
-	src1.Negate &= ~RC_MASK_W;
-	src1.Swizzle &= ~(7 << (3 * 3));
-	src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
-	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+	src0.Swizzle &= ~(63 << (3 * 2));
+	src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+	src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+	src1.Swizzle &= ~(63 << (3 * 2));
+	src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+	emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
 	rc_remove_instruction(inst);
 }
 
@@ -553,6 +553,7 @@ int radeonTransformALU(
 	switch(inst->U.I.Opcode) {
 	case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
 	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+	case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
 	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
 	case RC_OPCODE_DST: transform_DST(c, inst); return 1;
 	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
@@ -615,6 +616,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
 	rc_remove_instruction(inst);
 }
 
+static void transform_r300_vertex_DP2(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_instruction *next_inst = inst->Next;
+	transform_DP2(c, inst);
+	next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
+}
+
+static void transform_r300_vertex_DP3(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	struct rc_src_register src1 = inst->U.I.SrcReg[1];
+	src0.Negate &= ~RC_MASK_W;
+	src0.Swizzle &= ~(7 << (3 * 3));
+	src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+	src1.Negate &= ~RC_MASK_W;
+	src1.Swizzle &= ~(7 << (3 * 3));
+	src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	rc_remove_instruction(inst);
+}
+
 static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
 	struct rc_instruction* inst)
 {
@@ -758,7 +782,8 @@ int r300_transform_vertex_alu(
 	case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
 	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
 	case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
-	case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+	case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
+	case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
 	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
 	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
 	case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;




More information about the mesa-commit mailing list