<p></p>
<p>Sending from a mobile, pardon my terseness. ~ C.</p>
<div class="gmail_quote">On Aug 23, 2011 2:42 PM, "Tom Stellard" <<a href="mailto:tstellar@gmail.com">tstellar@gmail.com</a>> wrote:<br type="attribution">> According to the GLSL spec, the implementor can decide which way to round<br>
> when the fraction is .5. The r300 compiler will round down, so we can use<br>> CND and save an instruction.<br>> ---<br>> <br>> MLAA should work on r300g (r500 only) with this patch. I've tested<br>
> with the kasanen-post-process-v2 branch and it looks OK to me, but it<br>> would be nice to have a second opinion.<br>> <br>> I was testing with: pp_jimenezmlaa=8 glxgears<br>> <br>> src/gallium/drivers/r300/compiler/radeon_opcodes.c | 7 +++<br>
> src/gallium/drivers/r300/compiler/radeon_opcodes.h | 3 +<br>> .../drivers/r300/compiler/radeon_program_alu.c | 54 ++++++++++++++++++++<br>> src/gallium/drivers/r300/r300_tgsi_to_rc.c | 2 +-<br>
> 4 files changed, 65 insertions(+), 1 deletions(-)<br>> <br>> diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c<br>> index afd78ad..527db9a 100644<br>
> --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c<br>> +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c<br>> @@ -246,6 +246,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {<br>>                 .IsStandardScalar = 1<br>
>         },<br>>         {<br>> +                .Opcode = RC_OPCODE_ROUND,<br>> +                .Name = "ROUND",<br>> +                .NumSrcRegs = 1,<br>> +                .HasDstReg = 1,<br>> +                .IsComponentwise = 1<br>> +        },<br>> +        {<br>>                 .Opcode = RC_OPCODE_RSQ,<br>
>                 .Name = "RSQ",<br>>                 .NumSrcRegs = 1,<br>> diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h<br>> index b586882..968dc7b 100644<br>
> --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h<br>> +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h<br>> @@ -133,6 +133,9 @@ typedef enum {<br>>         /** scalar instruction: dst = 1 / src0.x */<br>
>         RC_OPCODE_RCP,<br>> <br>> +        /** vec4 instruction: dst.c = frc(src0.c) > 0.5 ? ceil(src0.c) : floor(src0.c) */<br>> +        RC_OPCODE_ROUND,<br>> +<br>>         /** scalar instruction: dst = 1 / sqrt(src0.x) */<br>
>         RC_OPCODE_RSQ,<br>> <br>> diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c<br>> index e273bc4..0bfd2dc 100644<br>> --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c<br>
> +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c<br>> @@ -104,6 +104,13 @@ static const struct rc_src_register builtin_one = {<br>>         .Index = 0,<br>>         .Swizzle = RC_SWIZZLE_1111<br>> };<br>
> +<br>> +static const struct rc_src_register builtin_half = {<br>> +        .File = RC_FILE_NONE,<br>> +        .Index = 0,<br>> +        .Swizzle = RC_SWIZZLE_HHHH<br>> +};<br>> +<br>> static const struct rc_src_register srcreg_undefined = {<br>
>         .File = RC_FILE_NONE,<br>>         .Index = 0,<br>> @@ -416,6 +423,52 @@ static void transform_POW(struct radeon_compiler* c,<br>>         rc_remove_instruction(inst);<br>> }<br>> <br>> +/* dst = ROUND(src) :<br>
> + * frac = FRC(src)<br>> + * low = src - frac<br>> + * high = low + 1<br>> + * dst = CND high, low, frac<br>> + *<br>> + * According to the GLSL spec, the implementor can decide which way to round<br>
> + * when the fraction is .5. In this case we round down, so we can use<br>> + * CND and save an instruction.<br>> + *<br>> + * The optimizer should reduce this sequence to 3 instructions using<br>> + * presubtract.<br>
> + */<br>> +static void transform_ROUND(struct radeon_compiler* c,<br>> +        struct rc_instruction* inst)<br>> +{<br>> +        unsigned int mask = inst->U.I.DstReg.WriteMask;<br>> +        unsigned int frac_index, low_index, high_index;<br>
> +        struct rc_dst_register frac_dst, low_dst, high_dst;<br>> +        struct rc_src_register frac_src, low_src, high_src;<br>> +<br>> +        /* frac = FRC(src) */<br>> +        frac_index = rc_find_free_temporary(c);<br>> +        frac_dst = dstregtmpmask(frac_index, mask);<br>
> +        emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, inst->U.I.SrcReg[0]);<br>> +        frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);<br>> +<br>> +        /* low = src - frc */<br>> +        low_index = rc_find_free_temporary(c);<br>
> +        low_dst = dstregtmpmask(low_index, mask);<br>> +        emit2(c, inst->Prev, RC_OPCODE_ADD, 0, low_dst,<br>> +                                        negate(inst->U.I.SrcReg[0]), frac_src);<br>> +        low_src = srcreg(RC_FILE_TEMPORARY, low_dst.Index);<br>
> +<br>> +        /* high = low + 1 */<br>> +        high_index = rc_find_free_temporary(c);<br>> +        high_dst = dstregtmpmask(high_index, mask);<br>> +        emit2(c, inst->Prev, RC_OPCODE_ADD, 0, high_dst, low_src, builtin_one);<br>
> +        high_src = srcreg(RC_FILE_TEMPORARY, high_dst.Index);<br>> +<br>> +        /* dst = CND high, low, frac */<br>> +        emit3(c, inst->Prev, RC_OPCODE_CND, 0, inst->U.I.DstReg,<br>> +                                                high_src, low_src, frac_src);<br>
> +        rc_remove_instruction(inst);<br>> +}<br>> +<br>> static void transform_RSQ(struct radeon_compiler* c,<br>>         struct rc_instruction* inst)<br>> {<br>> @@ -599,6 +652,7 @@ int radeonTransformALU(<br>
>         case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;<br>>         case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;<br>>         case RC_OPCODE_POW: transform_POW(c, inst); return 1;<br>> +        case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1;<br>
>         case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;<br>>         case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;<br>>         case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;<br>> diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c<br>
> index 07a3f3c..4cb08b5 100644<br>> --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c<br>> +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c<br>> @@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode)<br>
> case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;<br>> case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;<br>> case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;<br>> - /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */<br>
> + case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;<br>> case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;<br>> case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;<br>> case TGSI_OPCODE_POW: return RC_OPCODE_POW;<br>
> -- <br>> 1.7.3.4<br>> <br>> _______________________________________________<br>> mesa-dev mailing list<br>> <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>> <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</div>