[Mesa-dev] [PATCH v2 1/2] nv50/ir: optimize signed integer modulo by pow-of-2
Tobias Klausmann
tobias.johannes.klausmann at mni.thm.de
Sun Nov 12 14:09:53 UTC 2017
On 11/12/17 3:53 AM, Ilia Mirkin wrote:
> It's common to use signed int modulo in GLSL. As it happens, the GLSL
> specs allow the result to be undefined, but that seems fairly
> surprising. It's not that much more effort to get it right, at least for
> positive modulo operators.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>
> v1 -> v2:
>
> - fix SHLADD folding with relaxed isPow2 function
> - use correct FILE_PREDICATE/FILE_FLAGS variant on nv50/nvc0
>
> src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 8 +------
> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 28 +++++++++++++++++++---
> 2 files changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> index 4076177e56d..657784163b3 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> @@ -429,13 +429,7 @@ ImmediateValue::isNegative() const
> bool
> ImmediateValue::isPow2() const
> {
> - switch (reg.type) {
> - case TYPE_U8:
> - case TYPE_U16:
> - case TYPE_U32: return util_is_power_of_two(reg.data.u32);
> - default:
> - return false;
> - }
> + return util_is_power_of_two(reg.data.u32);
> }
maybe go back to special casing UINT/INT again here, instead of fixing
up SHLADD in the next hunk?
>
> void
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 7e4e193e3d2..5028fd71951 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -1054,6 +1054,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
> i->op = OP_ADD;
> } else
> if (s == 1 && !imm0.isNegative() && imm0.isPow2() &&
> + !isFloatType(i->dType) &&
> target->isOpSupported(OP_SHLADD, i->dType)) {
> i->op = OP_SHLADD;
> imm0.applyLog2();
> @@ -1163,10 +1164,31 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
> break;
>
> case OP_MOD:
> - if (i->sType == TYPE_U32 && imm0.isPow2()) {
> + if (s == 1 && imm0.isPow2()) {
> bld.setPosition(i, false);
> - i->op = OP_AND;
> - i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1));
> + if (i->sType == TYPE_U32) {
> + i->op = OP_AND;
> + i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1));
> + } else if (i->sType == TYPE_S32) {
> + // Do it on the absolute value of the input, and then restore the
> + // sign. The only odd case is MIN_INT, but that should work out
> + // as well, since MIN_INT mod any power of 2 is 0.
> + //
> + // Technically we don't have to do any of this since MOD is
> + // undefined with negative arguments in GLSL, but this seems like
> + // the nice thing to do.
> + Value *abs = bld.mkOp1v(OP_ABS, TYPE_S32, bld.getSSA(), i->getSrc(0));
> + Value *neg, *v1, *v2;
> + bld.mkCmp(OP_SET, CC_LT, TYPE_S32, (neg = bld.getSSA(1, prog->getTarget()->nativeFile(FILE_PREDICATE))), TYPE_S32, i->getSrc(0), bld.loadImm(NULL, 0));
> + Value *mod = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), abs, bld.loadImm(NULL, imm0.reg.data.u32 - 1));
have a few linebreakes here, but i don't have to tell ya that (still
considered experimental? :D)
> + bld.mkOp1(OP_NEG, TYPE_S32, (v1 = bld.getSSA()), mod)
> + ->setPredicate(CC_P, neg);
> + bld.mkOp1(OP_MOV, TYPE_S32, (v2 = bld.getSSA()), mod)
> + ->setPredicate(CC_NOT_P, neg);
> + newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);
> +
> + delete_Instruction(prog, i);
> + }
> }
> break;
>
The patch looks fine otherwise, i will test it a bit later and see if i
can trigger something else with it!
Greetings,
Tobias
More information about the mesa-dev
mailing list