[Mesa-dev] [PATCH] r300/compiler: Implement ROUND
Tom Stellard
tstellar at gmail.com
Tue Aug 23 14:41:45 PDT 2011
According to the GLSL spec, the implementor can decide which way to round
when the fraction is .5. The r300 compiler will round down, so we can use
CND and save an instruction.
---
MLAA should work on r300g (r500 only) with this patch. I've tested
with the kasanen-post-process-v2 branch and it looks OK to me, but it
would be nice to have a second opinion.
I was testing with: pp_jimenezmlaa=8 glxgears
src/gallium/drivers/r300/compiler/radeon_opcodes.c | 7 +++
src/gallium/drivers/r300/compiler/radeon_opcodes.h | 3 +
.../drivers/r300/compiler/radeon_program_alu.c | 54 ++++++++++++++++++++
src/gallium/drivers/r300/r300_tgsi_to_rc.c | 2 +-
4 files changed, 65 insertions(+), 1 deletions(-)
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
index afd78ad..527db9a 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -246,6 +246,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsStandardScalar = 1
},
{
+ .Opcode = RC_OPCODE_ROUND,
+ .Name = "ROUND",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_RSQ,
.Name = "RSQ",
.NumSrcRegs = 1,
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
index b586882..968dc7b 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -133,6 +133,9 @@ typedef enum {
/** scalar instruction: dst = 1 / src0.x */
RC_OPCODE_RCP,
+ /** vec4 instruction: dst.c = frc(src0.c) > 0.5 ? ceil(src0.c) : floor(src0.c) */
+ RC_OPCODE_ROUND,
+
/** scalar instruction: dst = 1 / sqrt(src0.x) */
RC_OPCODE_RSQ,
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
index e273bc4..0bfd2dc 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -104,6 +104,13 @@ static const struct rc_src_register builtin_one = {
.Index = 0,
.Swizzle = RC_SWIZZLE_1111
};
+
+static const struct rc_src_register builtin_half = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_HHHH
+};
+
static const struct rc_src_register srcreg_undefined = {
.File = RC_FILE_NONE,
.Index = 0,
@@ -416,6 +423,52 @@ static void transform_POW(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+/* dst = ROUND(src) :
+ * frac = FRC(src)
+ * low = src - frac
+ * high = low + 1
+ * dst = CND high, low, frac
+ *
+ * According to the GLSL spec, the implementor can decide which way to round
+ * when the fraction is .5. In this case we round down, so we can use
+ * CND and save an instruction.
+ *
+ * The optimizer should reduce this sequence to 3 instructions using
+ * presubtract.
+ */
+static void transform_ROUND(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ unsigned int mask = inst->U.I.DstReg.WriteMask;
+ unsigned int frac_index, low_index, high_index;
+ struct rc_dst_register frac_dst, low_dst, high_dst;
+ struct rc_src_register frac_src, low_src, high_src;
+
+ /* frac = FRC(src) */
+ frac_index = rc_find_free_temporary(c);
+ frac_dst = dstregtmpmask(frac_index, mask);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, inst->U.I.SrcReg[0]);
+ frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);
+
+ /* low = src - frc */
+ low_index = rc_find_free_temporary(c);
+ low_dst = dstregtmpmask(low_index, mask);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, low_dst,
+ negate(inst->U.I.SrcReg[0]), frac_src);
+ low_src = srcreg(RC_FILE_TEMPORARY, low_dst.Index);
+
+ /* high = low + 1 */
+ high_index = rc_find_free_temporary(c);
+ high_dst = dstregtmpmask(high_index, mask);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, high_dst, low_src, builtin_one);
+ high_src = srcreg(RC_FILE_TEMPORARY, high_dst.Index);
+
+ /* dst = CND high, low, frac */
+ emit3(c, inst->Prev, RC_OPCODE_CND, 0, inst->U.I.DstReg,
+ high_src, low_src, frac_src);
+ rc_remove_instruction(inst);
+}
+
static void transform_RSQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -599,6 +652,7 @@ int radeonTransformALU(
case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+ case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1;
case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 07a3f3c..4cb08b5 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode)
case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;
case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
- /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */
+ case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
case TGSI_OPCODE_POW: return RC_OPCODE_POW;
--
1.7.3.4
More information about the mesa-dev
mailing list