[Mesa-dev] [PATCH] r300/compiler: Implement ROUND
Tom Stellard
tstellar at gmail.com
Wed Aug 24 06:37:40 PDT 2011
According to the GLSL spec, the implementor can decide which way to round
when the fraction is .5. The r300 compiler will round down.
---
v2 Slightly better implementation that uses one less instruction
in the best case.
src/gallium/drivers/r300/compiler/radeon_opcodes.c | 7 +++
src/gallium/drivers/r300/compiler/radeon_opcodes.h | 3 +
.../drivers/r300/compiler/radeon_program_alu.c | 45 ++++++++++++++++++++
src/gallium/drivers/r300/r300_tgsi_to_rc.c | 2 +-
4 files changed, 56 insertions(+), 1 deletions(-)
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
index afd78ad..527db9a 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -246,6 +246,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsStandardScalar = 1
},
{
+ .Opcode = RC_OPCODE_ROUND,
+ .Name = "ROUND",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_RSQ,
.Name = "RSQ",
.NumSrcRegs = 1,
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
index b586882..0b881c2 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -133,6 +133,9 @@ typedef enum {
/** scalar instruction: dst = 1 / src0.x */
RC_OPCODE_RCP,
+ /** vec4 instruction: dst.c = floor(src0.c + 0.5) */
+ RC_OPCODE_ROUND,
+
/** scalar instruction: dst = 1 / sqrt(src0.x) */
RC_OPCODE_RSQ,
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
index e273bc4..dd1dfb3 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -104,6 +104,13 @@ static const struct rc_src_register builtin_one = {
.Index = 0,
.Swizzle = RC_SWIZZLE_1111
};
+
+static const struct rc_src_register builtin_half = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_HHHH
+};
+
static const struct rc_src_register srcreg_undefined = {
.File = RC_FILE_NONE,
.Index = 0,
@@ -416,6 +423,43 @@ static void transform_POW(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+/* dst = ROUND(src) :
+ * add = src + .5
+ * frac = FRC(add)
+ * dst = add - frac
+ *
+ * According to the GLSL spec, the implementor can decide which way to round
+ * when the fraction is .5. We round down for .5.
+ *
+ */
+static void transform_ROUND(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ unsigned int mask = inst->U.I.DstReg.WriteMask;
+ unsigned int frac_index, add_index;
+ struct rc_dst_register frac_dst, add_dst;
+ struct rc_src_register frac_src, add_src;
+
+ /* add = src + .5 */
+ add_index = rc_find_free_temporary(c);
+ add_dst = dstregtmpmask(add_index, mask);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0],
+ builtin_half);
+ add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index);
+
+
+ /* frac = FRC(add) */
+ frac_index = rc_find_free_temporary(c);
+ frac_dst = dstregtmpmask(frac_index, mask);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src);
+ frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);
+
+ /* dst = add - frac */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg,
+ add_src, negate(frac_src));
+ rc_remove_instruction(inst);
+}
+
static void transform_RSQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -599,6 +643,7 @@ int radeonTransformALU(
case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+ case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1;
case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 07a3f3c..4cb08b5 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode)
case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;
case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
- /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */
+ case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
case TGSI_OPCODE_POW: return RC_OPCODE_POW;
--
1.7.3.4
More information about the mesa-dev
mailing list