[Mesa-dev] [PATCH v2 1/5] nv50/ir: add preliminary support for OP_XMAD

Wed Jul 18 18:04:02 UTC 2018

uint16_t

On Wed, Jul 18, 2018 at 7:05 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.h      | 23 ++++++++++++++++++++++
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 17 ++++++++++++++--
>  .../drivers/nouveau/codegen/nv50_ir_print.cpp      | 18 +++++++++++++++++
>  .../drivers/nouveau/codegen/nv50_ir_target.cpp     |  7 ++++---
>  .../nouveau/codegen/nv50_ir_target_gm107.cpp       |  1 +
>  .../nouveau/codegen/nv50_ir_target_nv50.cpp        |  1 +
>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp        | 15 ++++++++++++++
>  7 files changed, 77 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index 0b220cc48d..9798e98a1a 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -58,6 +58,9 @@ enum operation
>     OP_FMA,
>     OP_SAD, // abs(src0 - src1) + src2
>     OP_SHLADD,
> +   // extended multiply-add (GM107+), does a lot of things.
> +   // see envytools for detailed documentation
> +   OP_XMAD,
>     OP_ABS,
>     OP_NEG,
>     OP_NOT,
> @@ -256,6 +259,26 @@ enum operation
>  #define NV50_IR_SUBOP_MINMAX_MED  2
>  #define NV50_IR_SUBOP_MINMAX_HIGH 3
>
> +// xmad(src0, src1, 0) << 16 + src2
> +#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
> +// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
> +#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)
> +// xmad(src0, src1, src2.lo)
> +#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)
> +// xmad(src0, src1, src2.hi)
> +#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)
> +// if both operands to the multiplication are non-zero, subtract 65536 for each
> +// negative operand
> +#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)
> +// xmad(src0, src1, src2) + src1 << 16
> +#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)
> +#define NV50_IR_SUBOP_XMAD_CMODE_MASK (0x7 << 2)
> +

yeah, this looks nice and quite understandable!

> +// use the high 16 bits instead of the low 16 bits for the multiplication.
> +// if the instruction's sType is signed, sign extend the operand from 16 bits
> +// to 32 before multiplication.
> +#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (6 + (i)))

wouldn't it be enough to do (1 << (5 + (i)))?

> +
>  enum DataType
>  {
>     TYPE_NONE,
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 4e08cfadec..5fc1fba970 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -191,9 +191,16 @@ void
>  LoadPropagation::checkSwapSrc01(Instruction *insn)
>  {
>     const Target *targ = prog->getTarget();
> -   if (!targ->getOpInfo(insn).commutative)
> -      if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB)
> +   if (!targ->getOpInfo(insn).commutative) {NV50_IR_SUBOP_XMAD_CBCC
> +      if (insn->op != OP_SET && insn->op != OP_SLCT &&
> +          insn->op != OP_SUB && insn->op != OP_XMAD)
>           return;
> +      // XMAD is only commutative if both the CBCC and MRG flags are not set.
> +      if (insn->op == OP_XMAD && (insn->subOp & 0x1c) == NV50_IR_SUBOP_XMAD_CBCC)
> +         return;
> +      if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG))
> +         return;

maybe it makes sense to make both check look a like. And I think
NV50_IR_SUBOP_XMAD_CMODE_MASK should be used instead of 0x1c.

> +   }
>     if (insn->src(1).getFile() != FILE_GPR)
>        return;
>     // This is the special OP_SET used for alphatesting, we can't reverse its
> @@ -236,6 +243,12 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
>     if (insn->op == OP_SUB) {
>        insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
>        insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
> +   } else
> +   if (insn->op == OP_XMAD) {
> +      // swap h1 flags
> +      uint16_t h1 = (insn->subOp >> 6) & 0x3;
> +      h1 = (h1 >> 1 & 0x1) | (h1 << 1 & 0x2);
> +      insn->subOp = (insn->subOp & ~uint16_t(0x3 << 6)) | (h1 << 6);

I think I would prefer this code being less magic numbers and more
constants, but I am not able to come up with a nice way of doing it
macros either :/

but I think you can skip on moving the bits alltogether:

uint16_t h1 = insn->subOp & 0xc0;
h1 = (h1 >> 1 & 0x40) | (h1 << 1 & 0x80);
insn->subOp = (insn->subOp & ~(0x3 << 6)) | h1;

and then we can probably get rid of the first mask as well:

uint16_t h1 = (insn->subOp >> 1 & 0x40) | (insn->subOp << 1 & 0x80);
insn->subOp = (insn->subOp & ~(0x3 << 6)) | h1;

and maybe use the above macros?

uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0) |
(insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1));
insn->subOp = (insn->subOp & ~(0x3 << 6)) | h1;

still looks a bit weird with the 0x3 << 6 thing though, but I prefer
to have no magic numbers here. Maybe have a macro for selecting all H1
bits as well?

>     }
>  }
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> index ee3506fbae..dc27674369 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
>     "fma",
>     "sad",
>     "shladd",
> +   "xmad",
>     "abs",
>     "neg",
>     "not",
> @@ -239,6 +240,11 @@ static const char *barOpStr[] =
>  {
>     "sync", "arrive", "red and", "red or", "red popc"
>  };
> +
> +static const char *xmadOpCModeStr[] =
> +{
> +   "clo", "chi", "csfu", "cbcc"
> +};
>
>  static const char *DataTypeStr[] =
>  {
> @@ -625,6 +631,18 @@ void Instruction::print() const
>           if (subOp < ARRAY_SIZE(barOpStr))
>              PRINT("%s ", barOpStr[subOp]);
>           break;
> +      case OP_XMAD: {
> +         if (subOp & NV50_IR_SUBOP_XMAD_PSL)
> +            PRINT("psl ");
> +         if (subOp & NV50_IR_SUBOP_XMAD_MRG)
> +            PRINT("mrg ");
> +         unsigned cmode = (subOp >> 2) & 0x7;

better use (subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) >> 2

> +         if (cmode && cmode <= ARRAY_SIZE(xmadOpCModeStr))
> +            PRINT("%s ", xmadOpCModeStr[cmode - 1]);
> +         for (int i = 0; i < 2; i++)
> +            PRINT("h%d ", (subOp & (i)) ? 1 : 0);
> +         break;
> +      }
>        default:
>           if (subOp)
>              PRINT("(SUBOP:%u) ", subOp);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
> index 298e7c6ef9..9193a01f18 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
> @@ -30,7 +30,8 @@ const uint8_t Target::operationSrcNr[] =
>     0, 0,                   // NOP, PHI
>     0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
>     1, 1, 2,                // MOV, LOAD, STORE
> -   2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
> +   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
> +   3, 3,                   // SHLADD, XMAD
>     1, 1, 1,                // ABS, NEG, NOT
>     2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
>     2, 2, 1,                // MAX, MIN, SAT
> @@ -70,10 +71,10 @@ const OpClass Target::operationClass[] =
>     OPCLASS_MOVE,
>     OPCLASS_LOAD,
>     OPCLASS_STORE,
> -   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
> +   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
>     OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
>     OPCLASS_ARITH, OPCLASS_ARITH,
> -   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
> +   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
>     // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
>     OPCLASS_CONVERT, OPCLASS_CONVERT,
>     OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> index 04cbd402a1..24a1cbb8da 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> @@ -60,6 +60,7 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
>     case OP_SQRT:
>     case OP_DIV:
>     case OP_MOD:
> +   case OP_XMAD:
>        return false;
>     default:
>        break;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> index 1ad3467337..2981497340 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> @@ -443,6 +443,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
>     case OP_EXIT: // want exit modifier instead (on NOP if required)
>     case OP_MEMBAR:
>     case OP_SHLADD:
> +   case OP_XMAD:
>        return false;
>     case OP_SAD:
>        return ty == TYPE_S32;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> index 7e059235f4..94e98ada5e 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> @@ -356,6 +356,18 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
>     if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 &&
>         sf == FILE_MEMORY_CONST)
>        return false;
> +   // constant buffer loads can't be used with cbcc xmads
> +   if (i->op == OP_XMAD && (i->subOp & 0x1c) == NV50_IR_SUBOP_XMAD_CBCC &&
> +       sf == FILE_MEMORY_CONST)

use NV50_IR_SUBOP_XMAD_CMODE_MASK instead of 0x1c

> +      return false;
> +   // constant buffer loads for the third operand can't be used with psl/mrg xmads
> +   if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && s == 2 &&
> +       (i->subOp & (NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_MRG)))
> +      return false;
> +   // for xmads, immediates can't have the h1 flag set
> +   if (i->op == OP_XMAD && sf == FILE_IMMEDIATE && s < 2 &&
> +       i->subOp & NV50_IR_SUBOP_XMAD_H1(s))
> +      return false;
>
>     for (int k = 0; i->srcExists(k); ++k) {
>        if (i->src(k).getFile() == FILE_IMMEDIATE) {
> @@ -448,6 +460,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const
>        return false;
>     if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
>        return false;
> +   if (op == OP_XMAD)
> +      return false;
>     return true;
>  }
>
> @@ -467,6 +481,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
>        case OP_XOR:
>        case OP_POPCNT:
>        case OP_BFIND:
> +      case OP_XMAD:
>           break;
>        case OP_SET:
>           if (insn->sType != TYPE_F32)
> --
> 2.14.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev