[Mesa-dev] [PATCH v3 1/4] nv50/ir: add preliminary support for OP_XMAD

Karol Herbst kherbst at redhat.com
Tue Aug 14 10:40:24 UTC 2018


okay, with that removed, patch is Reviewed-by: Karol Herbst <kherbst at redhat.com>

On Fri, Aug 10, 2018 at 6:33 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> I don't think so.
>
> On Wed, Aug 8, 2018 at 11:27 PM, Karol Herbst <kherbst at redhat.com> wrote:
>> On Mon, Jul 23, 2018 at 12:40 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
>>> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
>>> ---
>>>  src/gallium/drivers/nouveau/codegen/nv50_ir.h      | 26 ++++++++++++++++++++++
>>>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 18 +++++++++++++--
>>>  .../drivers/nouveau/codegen/nv50_ir_print.cpp      | 19 ++++++++++++++++
>>>  .../drivers/nouveau/codegen/nv50_ir_target.cpp     |  7 +++---
>>>  .../nouveau/codegen/nv50_ir_target_gm107.cpp       |  1 +
>>>  .../nouveau/codegen/nv50_ir_target_nv50.cpp        |  1 +
>>>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp        | 15 +++++++++++++
>>>  7 files changed, 82 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>> index 0b220cc48d..13822a08c3 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>> @@ -58,6 +58,9 @@ enum operation
>>>     OP_FMA,
>>>     OP_SAD, // abs(src0 - src1) + src2
>>>     OP_SHLADD,
>>> +   // extended multiply-add (GM107+), does a lot of things.
>>> +   // see envytools for detailed documentation
>>> +   OP_XMAD,
>>>     OP_ABS,
>>>     OP_NEG,
>>>     OP_NOT,
>>> @@ -256,6 +259,29 @@ enum operation
>>>  #define NV50_IR_SUBOP_MINMAX_MED  2
>>>  #define NV50_IR_SUBOP_MINMAX_HIGH 3
>>>
>>> +// xmad(src0, src1, 0) << 16 + src2
>>> +#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
>>> +// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
>>> +#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)
>>> +// xmad(src0, src1, src2.lo)
>>> +#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)
>>> +// xmad(src0, src1, src2.hi)
>>> +#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)
>>> +// if both operands to the multiplication are non-zero, subtract 65536 for each
>>> +// negative operand
>>> +#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)
>>> +// xmad(src0, src1, src2) + src1 << 16
>>> +#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)
>>> +#define NV50_IR_SUBOP_XMAD_CMODE_SHIFT 2
>>> +#define NV50_IR_SUBOP_XMAD_CMODE_MASK uint16_t(0x7 << NV50_IR_SUBOP_XMAD_CMODE_SHIFT)
>>> +
>>> +// use the high 16 bits instead of the low 16 bits for the multiplication.
>>> +// if the instruction's sType is signed, sign extend the operand from 16 bits
>>> +// to 32 before multiplication.
>>> +#define NV50_IR_SUBOP_XMAD_H1_SHIFT 5
>>> +#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (NV50_IR_SUBOP_XMAD_H1_SHIFT + (i)))
>>> +#define NV50_IR_SUBOP_XMAD_H1_MASK uint16_t(0x3 << NV50_IR_SUBOP_XMAD_H1_SHIFT)
>>
>> is the uint16_t really needed?
>>
>>> +
>>>  enum DataType
>>>  {
>>>     TYPE_NONE,
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>>> index 16022e6f23..6deea7a360 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>>> @@ -191,9 +191,17 @@ void
>>>  LoadPropagation::checkSwapSrc01(Instruction *insn)
>>>  {
>>>     const Target *targ = prog->getTarget();
>>> -   if (!targ->getOpInfo(insn).commutative)
>>> -      if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB)
>>> +   if (!targ->getOpInfo(insn).commutative) {
>>> +      if (insn->op != OP_SET && insn->op != OP_SLCT &&
>>> +          insn->op != OP_SUB && insn->op != OP_XMAD)
>>>           return;
>>> +      // XMAD is only commutative if both the CBCC and MRG flags are not set.
>>> +      if (insn->op == OP_XMAD &&
>>> +          (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC)
>>> +         return;
>>> +      if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG))
>>> +         return;
>>> +   }
>>>     if (insn->src(1).getFile() != FILE_GPR)
>>>        return;
>>>     // This is the special OP_SET used for alphatesting, we can't reverse its
>>> @@ -236,6 +244,12 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
>>>     if (insn->op == OP_SUB) {
>>>        insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
>>>        insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
>>> +   } else
>>> +   if (insn->op == OP_XMAD) {
>>> +      // swap h1 flags
>>> +      uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0)) |
>>> +                    (insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1));
>>> +      insn->subOp = (insn->subOp & ~NV50_IR_SUBOP_XMAD_H1_MASK) | h1;
>>>     }
>>>  }
>>>
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
>>> index ee3506fbae..7eab8b8d70 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
>>> @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
>>>     "fma",
>>>     "sad",
>>>     "shladd",
>>> +   "xmad",
>>>     "abs",
>>>     "neg",
>>>     "not",
>>> @@ -240,6 +241,11 @@ static const char *barOpStr[] =
>>>     "sync", "arrive", "red and", "red or", "red popc"
>>>  };
>>>
>>> +static const char *xmadOpCModeStr[] =
>>> +{
>>> +   "clo", "chi", "csfu", "cbcc"
>>> +};
>>> +
>>>  static const char *DataTypeStr[] =
>>>  {
>>>     "-",
>>> @@ -625,6 +631,19 @@ void Instruction::print() const
>>>           if (subOp < ARRAY_SIZE(barOpStr))
>>>              PRINT("%s ", barOpStr[subOp]);
>>>           break;
>>> +      case OP_XMAD: {
>>> +         if (subOp & NV50_IR_SUBOP_XMAD_PSL)
>>> +            PRINT("psl ");
>>> +         if (subOp & NV50_IR_SUBOP_XMAD_MRG)
>>> +            PRINT("mrg ");
>>> +         unsigned cmode = (subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
>>> +         cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
>>> +         if (cmode && cmode <= ARRAY_SIZE(xmadOpCModeStr))
>>> +            PRINT("%s ", xmadOpCModeStr[cmode - 1]);
>>> +         for (int i = 0; i < 2; i++)
>>> +            PRINT("h%d ", (subOp & NV50_IR_SUBOP_XMAD_H1(i)) ? 1 : 0);
>>> +         break;
>>> +      }
>>>        default:
>>>           if (subOp)
>>>              PRINT("(SUBOP:%u) ", subOp);
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
>>> index 298e7c6ef9..9193a01f18 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
>>> @@ -30,7 +30,8 @@ const uint8_t Target::operationSrcNr[] =
>>>     0, 0,                   // NOP, PHI
>>>     0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
>>>     1, 1, 2,                // MOV, LOAD, STORE
>>> -   2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
>>> +   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
>>> +   3, 3,                   // SHLADD, XMAD
>>>     1, 1, 1,                // ABS, NEG, NOT
>>>     2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
>>>     2, 2, 1,                // MAX, MIN, SAT
>>> @@ -70,10 +71,10 @@ const OpClass Target::operationClass[] =
>>>     OPCLASS_MOVE,
>>>     OPCLASS_LOAD,
>>>     OPCLASS_STORE,
>>> -   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
>>> +   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
>>>     OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
>>>     OPCLASS_ARITH, OPCLASS_ARITH,
>>> -   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
>>> +   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
>>>     // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
>>>     OPCLASS_CONVERT, OPCLASS_CONVERT,
>>>     OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>>> index adbfcc3cfe..7293fb27dd 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
>>> @@ -60,6 +60,7 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
>>>     case OP_SQRT:
>>>     case OP_DIV:
>>>     case OP_MOD:
>>> +   case OP_XMAD:
>>>        return false;
>>>     default:
>>>        break;
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> index 1ad3467337..2981497340 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> @@ -443,6 +443,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
>>>     case OP_EXIT: // want exit modifier instead (on NOP if required)
>>>     case OP_MEMBAR:
>>>     case OP_SHLADD:
>>> +   case OP_XMAD:
>>>        return false;
>>>     case OP_SAD:
>>>        return ty == TYPE_S32;
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> index 7e059235f4..7e66d2950b 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>> @@ -356,6 +356,18 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
>>>     if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 &&
>>>         sf == FILE_MEMORY_CONST)
>>>        return false;
>>> +   // constant buffer loads can't be used with cbcc xmads
>>> +   if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST &&
>>> +       (i->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC)
>>> +      return false;
>>> +   // constant buffer loads for the third operand can't be used with psl/mrg xmads
>>> +   if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && s == 2 &&
>>> +       (i->subOp & (NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_MRG)))
>>> +      return false;
>>> +   // for xmads, immediates can't have the h1 flag set
>>> +   if (i->op == OP_XMAD && sf == FILE_IMMEDIATE && s < 2 &&
>>> +       i->subOp & NV50_IR_SUBOP_XMAD_H1(s))
>>> +      return false;
>>>
>>>     for (int k = 0; i->srcExists(k); ++k) {
>>>        if (i->src(k).getFile() == FILE_IMMEDIATE) {
>>> @@ -448,6 +460,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const
>>>        return false;
>>>     if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
>>>        return false;
>>> +   if (op == OP_XMAD)
>>> +      return false;
>>>     return true;
>>>  }
>>>
>>> @@ -467,6 +481,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
>>>        case OP_XOR:
>>>        case OP_POPCNT:
>>>        case OP_BFIND:
>>> +      case OP_XMAD:
>>>           break;
>>>        case OP_SET:
>>>           if (insn->sType != TYPE_F32)
>>> --
>>> 2.14.4
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list