[Mesa-dev] [PATCH v2 1/2] nv50/ir: optimize signed integer modulo by pow-of-2

Tobias Klausmann tobias.johannes.klausmann at mni.thm.de
Sun Nov 12 14:09:53 UTC 2017


On 11/12/17 3:53 AM, Ilia Mirkin wrote:
> It's common to use signed int modulo in GLSL. As it happens, the GLSL
> specs allow the result to be undefined, but that seems fairly
> surprising. It's not that much more effort to get it right, at least for
> positive modulo operators.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>
> v1 -> v2:
>
>   - fix SHLADD folding with relaxed isPow2 function
>   - use correct FILE_PREDICATE/FILE_FLAGS variant on nv50/nvc0
>
>   src/gallium/drivers/nouveau/codegen/nv50_ir.cpp    |  8 +------
>   .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 28 +++++++++++++++++++---
>   2 files changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> index 4076177e56d..657784163b3 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> @@ -429,13 +429,7 @@ ImmediateValue::isNegative() const
>   bool
>   ImmediateValue::isPow2() const
>   {
> -   switch (reg.type) {
> -   case TYPE_U8:
> -   case TYPE_U16:
> -   case TYPE_U32: return util_is_power_of_two(reg.data.u32);
> -   default:
> -      return false;
> -   }
> +   return util_is_power_of_two(reg.data.u32);
>   }


maybe go back to special casing UINT/INT again here, instead of fixing 
up SHLADD in the next hunk?


>   
>   void
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 7e4e193e3d2..5028fd71951 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -1054,6 +1054,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
>            i->op = OP_ADD;
>         } else
>         if (s == 1 && !imm0.isNegative() && imm0.isPow2() &&
> +          !isFloatType(i->dType) &&
>             target->isOpSupported(OP_SHLADD, i->dType)) {
>            i->op = OP_SHLADD;
>            imm0.applyLog2();
> @@ -1163,10 +1164,31 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
>         break;
>   
>      case OP_MOD:
> -      if (i->sType == TYPE_U32 && imm0.isPow2()) {
> +      if (s == 1 && imm0.isPow2()) {
>            bld.setPosition(i, false);
> -         i->op = OP_AND;
> -         i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1));
> +         if (i->sType == TYPE_U32) {
> +            i->op = OP_AND;
> +            i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1));
> +         } else if (i->sType == TYPE_S32) {
> +            // Do it on the absolute value of the input, and then restore the
> +            // sign. The only odd case is MIN_INT, but that should work out
> +            // as well, since MIN_INT mod any power of 2 is 0.
> +            //
> +            // Technically we don't have to do any of this since MOD is
> +            // undefined with negative arguments in GLSL, but this seems like
> +            // the nice thing to do.
> +            Value *abs = bld.mkOp1v(OP_ABS, TYPE_S32, bld.getSSA(), i->getSrc(0));
> +            Value *neg, *v1, *v2;
> +            bld.mkCmp(OP_SET, CC_LT, TYPE_S32, (neg = bld.getSSA(1, prog->getTarget()->nativeFile(FILE_PREDICATE))), TYPE_S32, i->getSrc(0), bld.loadImm(NULL, 0));
> +            Value *mod = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), abs, bld.loadImm(NULL, imm0.reg.data.u32 - 1));


have a few linebreakes here, but i don't have to tell ya that (still 
considered experimental? :D)


> +            bld.mkOp1(OP_NEG, TYPE_S32, (v1 = bld.getSSA()), mod)
> +               ->setPredicate(CC_P, neg);
> +            bld.mkOp1(OP_MOV, TYPE_S32, (v2 = bld.getSSA()), mod)
> +               ->setPredicate(CC_NOT_P, neg);
> +            newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);
> +
> +            delete_Instruction(prog, i);
> +         }
>         }
>         break;
>   


The patch looks fine otherwise, i will test it a bit later and see if i 
can trigger something else with it!

Greetings,

Tobias



More information about the mesa-dev mailing list