[Mesa-dev] [PATCH v3 1/4] nv50/ir: add preliminary support for OP_XMAD
Rhys Perry
pendingchaos02 at gmail.com
Mon Jul 23 10:40:28 UTC 2018
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
src/gallium/drivers/nouveau/codegen/nv50_ir.h | 26 ++++++++++++++++++++++
.../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 18 +++++++++++++--
.../drivers/nouveau/codegen/nv50_ir_print.cpp | 19 ++++++++++++++++
.../drivers/nouveau/codegen/nv50_ir_target.cpp | 7 +++---
.../nouveau/codegen/nv50_ir_target_gm107.cpp | 1 +
.../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 +
.../nouveau/codegen/nv50_ir_target_nvc0.cpp | 15 +++++++++++++
7 files changed, 82 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 0b220cc48d..13822a08c3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -58,6 +58,9 @@ enum operation
OP_FMA,
OP_SAD, // abs(src0 - src1) + src2
OP_SHLADD,
+ // extended multiply-add (GM107+), does a lot of things.
+ // see envytools for detailed documentation
+ OP_XMAD,
OP_ABS,
OP_NEG,
OP_NOT,
@@ -256,6 +259,29 @@ enum operation
#define NV50_IR_SUBOP_MINMAX_MED 2
#define NV50_IR_SUBOP_MINMAX_HIGH 3
+// xmad(src0, src1, 0) << 16 + src2
+#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
+// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
+#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)
+// xmad(src0, src1, src2.lo)
+#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)
+// xmad(src0, src1, src2.hi)
+#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)
+// if both operands to the multiplication are non-zero, subtract 65536 for each
+// negative operand
+#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)
+// xmad(src0, src1, src2) + src1 << 16
+#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)
+#define NV50_IR_SUBOP_XMAD_CMODE_SHIFT 2
+#define NV50_IR_SUBOP_XMAD_CMODE_MASK uint16_t(0x7 << NV50_IR_SUBOP_XMAD_CMODE_SHIFT)
+
+// use the high 16 bits instead of the low 16 bits for the multiplication.
+// if the instruction's sType is signed, sign extend the operand from 16 bits
+// to 32 before multiplication.
+#define NV50_IR_SUBOP_XMAD_H1_SHIFT 5
+#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (NV50_IR_SUBOP_XMAD_H1_SHIFT + (i)))
+#define NV50_IR_SUBOP_XMAD_H1_MASK uint16_t(0x3 << NV50_IR_SUBOP_XMAD_H1_SHIFT)
+
enum DataType
{
TYPE_NONE,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 16022e6f23..6deea7a360 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -191,9 +191,17 @@ void
LoadPropagation::checkSwapSrc01(Instruction *insn)
{
const Target *targ = prog->getTarget();
- if (!targ->getOpInfo(insn).commutative)
- if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB)
+ if (!targ->getOpInfo(insn).commutative) {
+ if (insn->op != OP_SET && insn->op != OP_SLCT &&
+ insn->op != OP_SUB && insn->op != OP_XMAD)
return;
+ // XMAD is only commutative if both the CBCC and MRG flags are not set.
+ if (insn->op == OP_XMAD &&
+ (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC)
+ return;
+ if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG))
+ return;
+ }
if (insn->src(1).getFile() != FILE_GPR)
return;
// This is the special OP_SET used for alphatesting, we can't reverse its
@@ -236,6 +244,12 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
if (insn->op == OP_SUB) {
insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
+ } else
+ if (insn->op == OP_XMAD) {
+ // swap h1 flags
+ uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0)) |
+ (insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1));
+ insn->subOp = (insn->subOp & ~NV50_IR_SUBOP_XMAD_H1_MASK) | h1;
}
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index ee3506fbae..7eab8b8d70 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
"fma",
"sad",
"shladd",
+ "xmad",
"abs",
"neg",
"not",
@@ -240,6 +241,11 @@ static const char *barOpStr[] =
"sync", "arrive", "red and", "red or", "red popc"
};
+static const char *xmadOpCModeStr[] =
+{
+ "clo", "chi", "csfu", "cbcc"
+};
+
static const char *DataTypeStr[] =
{
"-",
@@ -625,6 +631,19 @@ void Instruction::print() const
if (subOp < ARRAY_SIZE(barOpStr))
PRINT("%s ", barOpStr[subOp]);
break;
+ case OP_XMAD: {
+ if (subOp & NV50_IR_SUBOP_XMAD_PSL)
+ PRINT("psl ");
+ if (subOp & NV50_IR_SUBOP_XMAD_MRG)
+ PRINT("mrg ");
+ unsigned cmode = (subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
+ cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
+ if (cmode && cmode <= ARRAY_SIZE(xmadOpCModeStr))
+ PRINT("%s ", xmadOpCModeStr[cmode - 1]);
+ for (int i = 0; i < 2; i++)
+ PRINT("h%d ", (subOp & NV50_IR_SUBOP_XMAD_H1(i)) ? 1 : 0);
+ break;
+ }
default:
if (subOp)
PRINT("(SUBOP:%u) ", subOp);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 298e7c6ef9..9193a01f18 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -30,7 +30,8 @@ const uint8_t Target::operationSrcNr[] =
0, 0, // NOP, PHI
0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
1, 1, 2, // MOV, LOAD, STORE
- 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
+ 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
+ 3, 3, // SHLADD, XMAD
1, 1, 1, // ABS, NEG, NOT
2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
2, 2, 1, // MAX, MIN, SAT
@@ -70,10 +71,10 @@ const OpClass Target::operationClass[] =
OPCLASS_MOVE,
OPCLASS_LOAD,
OPCLASS_STORE,
- // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
+ // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH,
- OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
+ OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
// ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
OPCLASS_CONVERT, OPCLASS_CONVERT,
OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index adbfcc3cfe..7293fb27dd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -60,6 +60,7 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
case OP_SQRT:
case OP_DIV:
case OP_MOD:
+ case OP_XMAD:
return false;
default:
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index 1ad3467337..2981497340 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -443,6 +443,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
case OP_EXIT: // want exit modifier instead (on NOP if required)
case OP_MEMBAR:
case OP_SHLADD:
+ case OP_XMAD:
return false;
case OP_SAD:
return ty == TYPE_S32;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 7e059235f4..7e66d2950b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -356,6 +356,18 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 &&
sf == FILE_MEMORY_CONST)
return false;
+ // constant buffer loads can't be used with cbcc xmads
+ if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST &&
+ (i->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC)
+ return false;
+ // constant buffer loads for the third operand can't be used with psl/mrg xmads
+ if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && s == 2 &&
+ (i->subOp & (NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_MRG)))
+ return false;
+ // for xmads, immediates can't have the h1 flag set
+ if (i->op == OP_XMAD && sf == FILE_IMMEDIATE && s < 2 &&
+ i->subOp & NV50_IR_SUBOP_XMAD_H1(s))
+ return false;
for (int k = 0; i->srcExists(k); ++k) {
if (i->src(k).getFile() == FILE_IMMEDIATE) {
@@ -448,6 +460,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const
return false;
if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
return false;
+ if (op == OP_XMAD)
+ return false;
return true;
}
@@ -467,6 +481,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
case OP_XOR:
case OP_POPCNT:
case OP_BFIND:
+ case OP_XMAD:
break;
case OP_SET:
if (insn->sType != TYPE_F32)
--
2.14.4
More information about the mesa-dev
mailing list