[Mesa-dev] [PATCH v4] nvc0/ir: replace cvt instructions with add to improve shader performance
Ilia Mirkin
imirkin at alum.mit.edu
Tue Jan 29 03:44:57 UTC 2019
Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>
On Mon, Jan 28, 2019 at 5:47 PM Karol Herbst <kherbst at redhat.com> wrote:
>
> gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and
> gp107.
>
> reduces the amount of generated convert instructions by roughly 30% in
> shader-db.
>
> v2: only for 32 bit operations
> move some common code out of the switch
> handle OP_SAT with modifiers
> v3: only for registers and const memory
> rework if clauses
> merge isCvt into this patch
> v4: merge isCvt into its use
>
> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 64 +++++++++++++++++++
> .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 +
> 2 files changed, 65 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 295497be2f9..1d122d1ebdc 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -719,6 +719,67 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
> bb->remove(bb->getEntry());
> }
>
> +// replaces instructions which would end up as f2f or i2i with faster
> +// alternatives:
> +// - abs(a) -> add(0, abs a)
> +// - fneg(a) -> fadd(neg 0, neg a)
> +// - ineg(a) -> iadd(0, neg a)
> +// - fneg(abs a) -> fadd(neg 0, neg abs a)
> +// - ineg(abs a) -> iadd(0, neg abs a)
> +// - sat(a) -> sat add(0, a)
> +void
> +NVC0LegalizePostRA::replaceCvt(Instruction *cvt)
> +{
> + if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4)
> + return;
> + if (cvt->sType != cvt->dType)
> + return;
> + // we could make it work, but in this case we have optimizations disabled
> + // and we don't really care either way.
> + if (cvt->src(0).getFile() != FILE_GPR &&
> + cvt->src(0).getFile() != FILE_MEMORY_CONST)
> + return;
> +
> + Modifier mod0, mod1;
> +
> + switch (cvt->op) {
> + case OP_ABS:
> + if (cvt->src(0).mod)
> + return;
> + if (!isFloatType(cvt->sType))
> + return;
> + mod0 = 0;
> + mod1 = NV50_IR_MOD_ABS;
> + break;
> + case OP_NEG:
> + if (!isFloatType(cvt->sType) && cvt->src(0).mod)
> + return;
> + if (isFloatType(cvt->sType) &&
> + (cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS)))
> + return;
> +
> + mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0;
> + mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS) ?
> + NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG;
> + break;
> + case OP_SAT:
> + if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs())
> + return;
> + mod0 = 0;
> + mod1 = cvt->src(0).mod;
> + cvt->saturate = true;
> + break;
> + default:
> + return;
> + }
> +
> + cvt->op = OP_ADD;
> + cvt->moveSources(0, 1);
> + cvt->setSrc(0, rZero);
> + cvt->src(0).mod = mod0;
> + cvt->src(1).mod = mod1;
> +}
> +
> bool
> NVC0LegalizePostRA::visit(BasicBlock *bb)
> {
> @@ -758,6 +819,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
> next = hi;
> }
>
> + if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
> + replaceCvt(i);
> +
> if (i->op != OP_MOV && i->op != OP_PFETCH)
> replaceZero(i);
> }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index e0f50ab0904..4679c56471b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -81,6 +81,7 @@ private:
> virtual bool visit(Function *);
> virtual bool visit(BasicBlock *);
>
> + void replaceCvt(Instruction *);
> void replaceZero(Instruction *);
> bool tryReplaceContWithBra(BasicBlock *);
> void propagateJoin(BasicBlock *);
> --
> 2.20.1
>
More information about the mesa-dev
mailing list