[Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually

Sat Mar 19 22:26:13 UTC 2016

On 06:06 PM - Mar 19 2016, Ilia Mirkin wrote:
> Where are these coming from? Could you perhaps not generate them in
> the first place?

Those are coming from the generated SPIR-V, of the following kernel for
example:

    __kernel void global_id(__global int * out)
    {
      unsigned id = get_global_id(0);
      out[id] = id;
    }

But I don't see any reason why there should be cvt generated in this case. I'll
have to investigate the SPIR-V generation.

However, you could have some `long bar; char foo = convert_char_sat(bar);` in
the OpenCL kernel.

> 
> On Sat, Mar 19, 2016 at 5:56 PM, Pierre Moreau <pierre.morrow at free.fr> wrote:
> > Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the GPU
> > unhappy. Instead, manually handle the conversion between 64-bit and 32-bit
> > values, and use `cvt` to convert between the original target (resp. source)
> > and 32-bit value. This happens to be the behaviour of NVIDIA's driver.
> >
> > Signed-off-by: Pierre Moreau <pierre.morrow at free.fr>
> > ---
> >  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 59 ++++++++++++++++++++++
> >  .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |  1 +
> >  2 files changed, 60 insertions(+)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > index 2719f2c..c419a68 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > @@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i)
> >     return true;
> >  }
> >
> > +bool
> > +NVC0LoweringPass::handleCVT(Instruction *i)
> > +{
> > +   if (isFloatType(i->dType) || isFloatType(i->sType) ||
> > +         isSignedIntType(i->dType) xor isSignedIntType(i->sType))
> > +      return false;
> > +
> > +   if (typeSizeof(i->sType) == 8) {
> > +      Value *toSplit = i->getSrc(0);
> > +      if (i->saturate) {
> > +         Value *minValue = bld.loadImm(bld.getSSA(8), 0ul);
> > +         Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX);
> > +         if (isSignedType(i->sType)) {
> > +            minValue = bld.loadImm(bld.getSSA(8), INT32_MIN);
> > +            maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX);
> > +         }
> > +         Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), toSplit,
> > +                                    minValue);
> > +         toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes,
> > +                              maxValue);
> > +      }
> > +
> > +      Value *value32[2] = { bld.getSSA(), bld.getSSA() };
> > +      bld.mkSplit(value32, 4, toSplit);
> > +      if (typeSizeof(i->dType) == 4) {
> > +         bld.mkMov(i->getDef(0), value32[0], i->dType);
> > +         delete_Instruction(prog, i);
> > +         return true;
> > +      }
> > +
> > +      i->setSrc(0, bld.getSSA());
> > +      i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> > +      bld.mkMov(i->getSrc(0), value32[0], i->sType);
> > +   } else if (typeSizeof(i->dType) == 8) {
> > +      bld.setPosition(i, true);
> > +      Value *res = i->getDef(0);
> > +      Value *high32 = bld.loadImm(bld.getSSA(),
> > +                                  isSignedType(i->sType) ? UINT32_MAX : 0u);
> > +      Value *low32 = i->getSrc(0);
> > +      DataType resType = i->dType;
> > +
> > +      if (typeSizeof(i->sType) <= 2) {
> > +         i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> > +         low32 = bld.getSSA();
> > +         i->setDef(0, low32);
> > +      } else if (typeSizeof(i->sType) == 4) {
> > +         delete_Instruction(prog, i);
> > +      }
> > +
> > +      Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8), low32,
> > +                                   high32);
> > +      bld.mkMov(res, merged64, resType);
> > +   }
> > +
> > +   return true;
> > +}
> > +
> >  // Generate a binary predicate if an instruction is predicated by
> >  // e.g. an f32 value.
> >  void
> > @@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i)
> >        checkPredicate(i);
> >
> >     switch (i->op) {
> > +   case OP_CVT:
> > +      return handleCVT(i);
> >     case OP_TEX:
> >     case OP_TXB:
> >     case OP_TXL:
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> > index 6eb8aff..9fc24d9 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> > @@ -96,6 +96,7 @@ protected:
> >     bool handleMOD(Instruction *);
> >     bool handleSQRT(Instruction *);
> >     bool handlePOW(Instruction *);
> > +   bool handleCVT(Instruction *);
> >     bool handleTEX(TexInstruction *);
> >     bool handleTXD(TexInstruction *);
> >     bool handleTXQ(TexInstruction *);
> > --
> > 2.7.4
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160319/00acb691/attachment.sig>