[Mesa-dev] [PATCH 2/2] nvc0/ir: handle a load's reg result not being used for locked variants

Samuel Pitoiset samuel.pitoiset at gmail.com
Thu May 26 15:49:59 UTC 2016


What about the Maxwell logic? LDS expects a GPR at def(0) and in case 
the dst reg doesn't exist, you move the predicate to def(0).

Are you sure you don't need to check if dst(0) is not a predicate in 
emitLDS()?

On 05/26/2016 04:44 AM, Ilia Mirkin wrote:
> For a load locked, we might not use the first result but the second
> result is the predicate result of the locking. In that case the load
> splitting logic doesn't apply (which is designed for splitting 128-bit
> loads). Instead we take the predicate and move it into the first
> position (as having a dead result in first def's position upsets all
> sorts of things including RA). Update the emitters to deal with this as
> well.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 20 ++++++++++++++---
>  .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 26 +++++++++++++++++-----
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 10 +++++++--
>  3 files changed, 45 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> index 6a5981d..27d9b8e 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> @@ -2080,15 +2080,29 @@ CodeEmitterGK110::emitLOAD(const Instruction *i)
>     code[1] |= offset >> 9;
>
>     // Locked store on shared memory can fail.
> +   int r = 0, p = -1;
>     if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
>         i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
> -      assert(i->defExists(1));
> -      defId(i->def(1), 32 + 16);
> +      if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
> +         r = -1;
> +         p = 0;
> +      } else if (i->defExists(1)) { // r, p
> +         p = 1;
> +      } else {
> +         assert(!"Expected predicate dest for load locked");
> +      }
>     }
>
>     emitPredicate(i);
>
> -   defId(i->def(0), 2);
> +   if (r >= 0)
> +      defId(i->def(r), 2);
> +   else
> +      code[0] |= 255 << 2;
> +
> +   if (p >= 0)
> +      defId(i->def(p), 32 + 16);
> +
>     if (i->getIndirect(0, 0)) {
>        srcId(i->src(0).getIndirect(0), 10);
>        if (i->getIndirect(0, 0)->reg.size == 8)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> index 596293e..1bb962f 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> @@ -1874,17 +1874,31 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
>     }
>     code[1] = opc;
>
> +   int r = 0, p = -1;
>     if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
>        if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
> -         assert(i->defExists(1));
> -         if (targ->getChipset() >= NVISA_GK104_CHIPSET)
> -            defId(i->def(1), 8);
> -         else
> -            defId(i->def(1), 32 + 18);
> +         if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
> +            r = -1;
> +            p = 0;
> +         } else if (i->defExists(1)) { // r, p
> +            p = 1;
> +         } else {
> +            assert(!"Expected predicate dest for load locked");
> +         }
>        }
>     }
>
> -   defId(i->def(0), 14);
> +   if (r >= 0)
> +      defId(i->def(r), 14);
> +   else
> +      code[0] |= 63 << 14;
> +
> +   if (p >= 0) {
> +      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
> +         defId(i->def(p), 8);
> +      else
> +         defId(i->def(p), 32 + 18);
> +   }
>
>     setAddressByFile(i->src(0));
>     srcId(i->src(0).getIndirect(0), 20);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index cd801f3..3213188 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -3265,14 +3265,20 @@ DeadCodeElim::visit(BasicBlock *bb)
>           ++deadCount;
>           delete_Instruction(prog, i);
>        } else
> -      if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) {
> +      if (i->defExists(1) &&
> +          i->subOp == 0 &&
> +          (i->op == OP_VFETCH || i->op == OP_LOAD)) {
>           checkSplitLoad(i);
>        } else
>        if (i->defExists(0) && !i->getDef(0)->refCount()) {
>           if (i->op == OP_ATOM ||
>               i->op == OP_SUREDP ||
> -             i->op == OP_SUREDB)
> +             i->op == OP_SUREDB) {
>              i->setDef(0, NULL);
> +         } else if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
> +            i->setDef(0, i->getDef(1));
> +            i->setDef(1, NULL);
> +         }
>        }
>     }
>     return true;
>

-- 
-Samuel


More information about the mesa-dev mailing list