[Mesa-dev] [PATCH v3 2/4] gm107/ir: add support for OP_XMAD on GM107+

Fri Aug 10 21:03:41 UTC 2018

On Fri, Aug 10, 2018 at 8:36 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> Yeah, "immediate = false;" looks incorrect.
>
> Looking at nvdisasm and the placement of XMAD's various atoms, it seems it
> can only have 16-bit unsigned immediates, so envydis and the patches
> should probably be updated.
>
> As for how multiplication by immediates work with mul/mad -> XMAD
> conversion, the actual conversion should be done before LoadPropagation
> (so it doesn't have to worry about them and it's free of IMUL/IMAD's
> limitations) and the immediates should be propagated when possible later.
>
> After adjusting emitXMAD and TargetNVC0::insnCanLoad, I think the problem
> would be solved.
>

My point was if LoadPropagation would actually load 17+ bit sized
immediates into XMAD

> On Thu, Aug 9, 2018 at 11:32 PM, Karol Herbst <kherbst at redhat.com> wrote:
>> On Mon, Jul 23, 2018 at 12:40 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
>>> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
>>> ---
>>>  .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 65 ++++++++++++++++++++++
>>>  .../nouveau/codegen/nv50_ir_target_gm107.cpp       |  6 +-
>>>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp        |  1 +
>>>  3 files changed, 71 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
>>> index 1d31f181e4..c3d7be0f0e 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
>>> @@ -156,6 +156,7 @@ private:
>>>     void emitIMUL();
>>>     void emitIMAD();
>>>     void emitISCADD();
>>> +   void emitXMAD();
>>>     void emitIMNMX();
>>>     void emitICMP();
>>>     void emitISET();
>>> @@ -1892,6 +1893,67 @@ CodeEmitterGM107::emitISCADD()
>>>     emitGPR (0x00, insn->def(0));
>>>  }
>>>
>>> +void
>>> +CodeEmitterGM107::emitXMAD()
>>> +{
>>> +   assert(insn->src(0).getFile() == FILE_GPR);
>>> +
>>> +   bool constbuf = false;
>>> +   bool psl_mrg = true;
>>> +   bool immediate = false;
>>> +   if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
>>> +      assert(insn->src(1).getFile() == FILE_GPR);
>>> +      constbuf = true;
>>> +      psl_mrg = false;
>>> +      emitInsn(0x51000000);
>>> +      emitGPR(0x27, insn->src(1));
>>> +      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
>>> +   } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
>>> +      assert(insn->src(2).getFile() == FILE_GPR);
>>> +      constbuf = true;
>>> +      emitInsn(0x4e000000);
>>> +      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
>>> +      emitGPR(0x27, insn->src(2));
>>> +   } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
>>> +      assert(insn->src(2).getFile() == FILE_GPR);
>>> +      assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
>>> +      immediate = false;
>>
>> has to be immediate = true;
>>
>>> +      emitInsn(0x36000000);
>>> +      emitIMMD(0x14, 19, insn->src(1));
>>
>> we can only do 16 bit sized immediates with XMAD I think. I think we
>> also have to adjust the target so that those don't get load
>> propagated? How does this works out for mul/mad -> XMAD conversions
>> anyway? We might want to recheck that we actually do the right thing
>> there actually (or maybe it doesn't come up, still, would be nice to
>> fix it inside the target in case it is actually buggy).
>>
>>> +      emitGPR(0x27, insn->src(2));
>>> +   } else {
>>> +      assert(insn->src(1).getFile() == FILE_GPR);
>>> +      assert(insn->src(2).getFile() == FILE_GPR);
>>> +      emitInsn(0x5b000000);
>>> +      emitGPR(0x14, insn->src(1));
>>> +      emitGPR(0x27, insn->src(2));
>>> +   }
>>> +
>>> +   if (psl_mrg)
>>> +      emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
>>> +
>>> +   unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
>>> +   cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
>>> +   emitField(0x32, constbuf ? 2 : 3, cmode);
>>> +
>>> +   emitX(constbuf ? 0x36 : 0x26);
>>> +   emitCC(0x2f);
>>> +
>>> +   emitGPR(0x0, insn->def(0));
>>> +   emitGPR(0x8, insn->src(0));
>>> +
>>> +   // source flags
>>> +   if (isSignedType(insn->sType)) {
>>> +      uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
>>> +      emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
>>> +   }
>>> +   emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
>>> +   if (!immediate) {
>>> +      bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
>>> +      emitField(constbuf ? 0x34 : 0x23, 1, h1);
>>> +   }
>>> +}
>>> +
>>>  void
>>>  CodeEmitterGM107::emitIMNMX()
>>>  {
>>> @@ -3266,6 +3328,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
>>>     case OP_SHLADD:
>>>        emitISCADD();
>>>        break;
>>> +   case OP_XMAD:
>>> +      emitXMAD();
>>> +      break;
>>>     case OP_MIN:
>>>     case OP_MAX:
>>>        if (isFloatType(insn->dType)) {
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>>> index 7293fb27dd..bb1c234c43 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>>> @@ -60,8 +60,11 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
>>>     case OP_SQRT:
>>>     case OP_DIV:
>>>     case OP_MOD:
>>> -   case OP_XMAD:
>>>        return false;
>>> +   case OP_XMAD:
>>> +      if (isFloatType(ty))
>>> +         return false;
>>> +      break;
>>>     default:
>>>        break;
>>>     }
>>> @@ -231,6 +234,7 @@ TargetGM107::getLatency(const Instruction *insn) const
>>>     case OP_SUB:
>>>     case OP_VOTE:
>>>     case OP_XOR:
>>> +   case OP_XMAD:
>>>        if (insn->dType != TYPE_F64)
>>>           return 6;
>>>        break;
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> index 7e66d2950b..5257f353e4 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> @@ -161,6 +161,7 @@ static const struct opProperties _initPropsGM107[] = {
>>>     { OP_SUSTP,   0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
>>>     { OP_SUREDB,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
>>>     { OP_SUREDP,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
>>> +   { OP_XMAD,    0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
>>>  };
>>>
>>>  void TargetNVC0::initProps(const struct opProperties *props, int size)
>>> --
>>> 2.14.4
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev