[Mesa-dev] [PATCH] r300/compiler: Implement ROUND

Tom Stellard tstellar at gmail.com
Wed Aug 24 06:37:40 PDT 2011


According to the GLSL spec, the implementor can decide which way to round
when the fraction is .5.  The r300 compiler will round down.
---

v2 Slightly better implementation that uses one less instruction
in the best case.

 src/gallium/drivers/r300/compiler/radeon_opcodes.c |    7 +++
 src/gallium/drivers/r300/compiler/radeon_opcodes.h |    3 +
 .../drivers/r300/compiler/radeon_program_alu.c     |   45 ++++++++++++++++++++
 src/gallium/drivers/r300/r300_tgsi_to_rc.c         |    2 +-
 4 files changed, 56 insertions(+), 1 deletions(-)

diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
index afd78ad..527db9a 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -246,6 +246,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
 		.IsStandardScalar = 1
 	},
 	{
+		.Opcode = RC_OPCODE_ROUND,
+		.Name = "ROUND",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
 		.Opcode = RC_OPCODE_RSQ,
 		.Name = "RSQ",
 		.NumSrcRegs = 1,
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
index b586882..0b881c2 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -133,6 +133,9 @@ typedef enum {
 	/** scalar instruction: dst = 1 / src0.x */
 	RC_OPCODE_RCP,
 
+	/** vec4 instruction: dst.c = floor(src0.c + 0.5) */
+	RC_OPCODE_ROUND,
+
 	/** scalar instruction: dst = 1 / sqrt(src0.x) */
 	RC_OPCODE_RSQ,
 
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
index e273bc4..dd1dfb3 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -104,6 +104,13 @@ static const struct rc_src_register builtin_one = {
 	.Index = 0,
 	.Swizzle = RC_SWIZZLE_1111
 };
+
+static const struct rc_src_register builtin_half = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_HHHH
+};
+
 static const struct rc_src_register srcreg_undefined = {
 	.File = RC_FILE_NONE,
 	.Index = 0,
@@ -416,6 +423,43 @@ static void transform_POW(struct radeon_compiler* c,
 	rc_remove_instruction(inst);
 }
 
+/* dst = ROUND(src) :
+ *   add = src + .5
+ *   frac = FRC(add)
+ *   dst = add - frac
+ *
+ * According to the GLSL spec, the implementor can decide which way to round
+ * when the fraction is .5.  We round down for .5.
+ *
+ */
+static void transform_ROUND(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	unsigned int mask = inst->U.I.DstReg.WriteMask;
+	unsigned int frac_index, add_index;
+	struct rc_dst_register frac_dst, add_dst;
+	struct rc_src_register frac_src, add_src;
+
+	/* add = src + .5 */
+	add_index = rc_find_free_temporary(c);
+	add_dst = dstregtmpmask(add_index, mask);
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0],
+								builtin_half);
+	add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index);
+
+
+	/* frac = FRC(add) */
+	frac_index = rc_find_free_temporary(c);
+	frac_dst = dstregtmpmask(frac_index, mask);
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src);
+	frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);
+
+	/* dst = add - frac */
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg,
+						add_src, negate(frac_src));
+	rc_remove_instruction(inst);
+}
+
 static void transform_RSQ(struct radeon_compiler* c,
 	struct rc_instruction* inst)
 {
@@ -599,6 +643,7 @@ int radeonTransformALU(
 	case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
 	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
 	case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+	case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1;
 	case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
 	case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
 	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 07a3f3c..4cb08b5 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode)
         case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
         case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;
         case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
-     /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */
+        case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
         case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
         case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
         case TGSI_OPCODE_POW: return RC_OPCODE_POW;
-- 
1.7.3.4



More information about the mesa-dev mailing list