[Mesa-dev] [PATCH v3 2/4] gm107/ir: add support for OP_XMAD on GM107+
Rhys Perry
pendingchaos02 at gmail.com
Fri Aug 10 18:36:05 UTC 2018
Yeah, "immediate = false;" looks incorrect.
Looking at nvdisasm and the placement of XMAD's various atoms, it seems it
can only have 16-bit unsigned immediates, so envydis and the patches
should probably be updated.
As for how multiplication by immediates work with mul/mad -> XMAD
conversion, the actual conversion should be done before LoadPropagation
(so it doesn't have to worry about them and it's free of IMUL/IMAD's
limitations) and the immediates should be propagated when possible later.
After adjusting emitXMAD and TargetNVC0::insnCanLoad, I think the problem
would be solved.
On Thu, Aug 9, 2018 at 11:32 PM, Karol Herbst <kherbst at redhat.com> wrote:
> On Mon, Jul 23, 2018 at 12:40 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
>> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
>> ---
>> .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 65 ++++++++++++++++++++++
>> .../nouveau/codegen/nv50_ir_target_gm107.cpp | 6 +-
>> .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 +
>> 3 files changed, 71 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
>> index 1d31f181e4..c3d7be0f0e 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
>> @@ -156,6 +156,7 @@ private:
>> void emitIMUL();
>> void emitIMAD();
>> void emitISCADD();
>> + void emitXMAD();
>> void emitIMNMX();
>> void emitICMP();
>> void emitISET();
>> @@ -1892,6 +1893,67 @@ CodeEmitterGM107::emitISCADD()
>> emitGPR (0x00, insn->def(0));
>> }
>>
>> +void
>> +CodeEmitterGM107::emitXMAD()
>> +{
>> + assert(insn->src(0).getFile() == FILE_GPR);
>> +
>> + bool constbuf = false;
>> + bool psl_mrg = true;
>> + bool immediate = false;
>> + if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
>> + assert(insn->src(1).getFile() == FILE_GPR);
>> + constbuf = true;
>> + psl_mrg = false;
>> + emitInsn(0x51000000);
>> + emitGPR(0x27, insn->src(1));
>> + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
>> + } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
>> + assert(insn->src(2).getFile() == FILE_GPR);
>> + constbuf = true;
>> + emitInsn(0x4e000000);
>> + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
>> + emitGPR(0x27, insn->src(2));
>> + } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
>> + assert(insn->src(2).getFile() == FILE_GPR);
>> + assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
>> + immediate = false;
>
> has to be immediate = true;
>
>> + emitInsn(0x36000000);
>> + emitIMMD(0x14, 19, insn->src(1));
>
> we can only do 16 bit sized immediates with XMAD I think. I think we
> also have to adjust the target so that those don't get load
> propagated? How does this works out for mul/mad -> XMAD conversions
> anyway? We might want to recheck that we actually do the right thing
> there actually (or maybe it doesn't come up, still, would be nice to
> fix it inside the target in case it is actually buggy).
>
>> + emitGPR(0x27, insn->src(2));
>> + } else {
>> + assert(insn->src(1).getFile() == FILE_GPR);
>> + assert(insn->src(2).getFile() == FILE_GPR);
>> + emitInsn(0x5b000000);
>> + emitGPR(0x14, insn->src(1));
>> + emitGPR(0x27, insn->src(2));
>> + }
>> +
>> + if (psl_mrg)
>> + emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
>> +
>> + unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
>> + cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
>> + emitField(0x32, constbuf ? 2 : 3, cmode);
>> +
>> + emitX(constbuf ? 0x36 : 0x26);
>> + emitCC(0x2f);
>> +
>> + emitGPR(0x0, insn->def(0));
>> + emitGPR(0x8, insn->src(0));
>> +
>> + // source flags
>> + if (isSignedType(insn->sType)) {
>> + uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
>> + emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
>> + }
>> + emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
>> + if (!immediate) {
>> + bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
>> + emitField(constbuf ? 0x34 : 0x23, 1, h1);
>> + }
>> +}
>> +
>> void
>> CodeEmitterGM107::emitIMNMX()
>> {
>> @@ -3266,6 +3328,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
>> case OP_SHLADD:
>> emitISCADD();
>> break;
>> + case OP_XMAD:
>> + emitXMAD();
>> + break;
>> case OP_MIN:
>> case OP_MAX:
>> if (isFloatType(insn->dType)) {
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>> index 7293fb27dd..bb1c234c43 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>> @@ -60,8 +60,11 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
>> case OP_SQRT:
>> case OP_DIV:
>> case OP_MOD:
>> - case OP_XMAD:
>> return false;
>> + case OP_XMAD:
>> + if (isFloatType(ty))
>> + return false;
>> + break;
>> default:
>> break;
>> }
>> @@ -231,6 +234,7 @@ TargetGM107::getLatency(const Instruction *insn) const
>> case OP_SUB:
>> case OP_VOTE:
>> case OP_XOR:
>> + case OP_XMAD:
>> if (insn->dType != TYPE_F64)
>> return 6;
>> break;
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> index 7e66d2950b..5257f353e4 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> @@ -161,6 +161,7 @@ static const struct opProperties _initPropsGM107[] = {
>> { OP_SUSTP, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
>> { OP_SUREDB, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
>> { OP_SUREDP, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
>> + { OP_XMAD, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
>> };
>>
>> void TargetNVC0::initProps(const struct opProperties *props, int size)
>> --
>> 2.14.4
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list