[Mesa-dev] [PATCH 4/4] gm107/ir: use scalar tex instructions where possible
Karol Herbst
kherbst at redhat.com
Mon Aug 6 19:32:32 UTC 2018
TEXS, TLD4 and TLD4S are variants of tex instructions which are more
scalar, which gives RA more freedom and is less likely to insert silly
MOVs to satisfy quad registers.
shader-db changes:
total instructions in shared programs : 5814250 -> 5748182 (-1.14%)
total gprs used in shared programs : 674425 -> 669954 (-0.66%)
total shared used in shared programs : 548832 -> 548832 (0.00%)
total local used in shared programs : 21068 -> 21068 (0.00%)
total bytes used in shared programs : 62111424 -> 61407080 (-1.13%)
local shared gpr inst bytes
helped 0 0 2859 9610 9610
hurt 0 0 418 197 197
Signed-off-by: Karol Herbst <kherbst at redhat.com>
---
.../nouveau/codegen/nv50_ir_emit_gm107.cpp | 158 ++++++++++++++++-
.../drivers/nouveau/codegen/nv50_ir_ra.cpp | 162 ++++++++++++++++++
2 files changed, 317 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 5e8c22cd54b..a2db3f2044b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -191,6 +191,7 @@ private:
void emitTEXs(int);
void emitTEX();
+ void emitTEXS();
void emitTLD();
void emitTLD4();
void emitTXD();
@@ -2656,6 +2657,103 @@ CodeEmitterGM107::emitTEXs(int pos)
emitGPR(pos);
}
+static uint8_t
+getTEXSMask(uint8_t mask)
+{
+ switch (mask) {
+ case 0x1: return 0x0;
+ case 0x2: return 0x1;
+ case 0x3: return 0x4;
+ case 0x4: return 0x2;
+ case 0x7: return 0x0;
+ case 0x8: return 0x3;
+ case 0x9: return 0x5;
+ case 0xa: return 0x6;
+ case 0xb: return 0x1;
+ case 0xc: return 0x7;
+ case 0xd: return 0x2;
+ case 0xe: return 0x3;
+ case 0xf: return 0x4;
+ default:
+ assert(!"invalid mask");
+ }
+}
+
+static uint8_t
+getTEXSTarget(const TexInstruction *tex)
+{
+ assert(tex->op == OP_TEX || tex->op == OP_TXL);
+
+ switch (tex->tex.target.getEnum()) {
+ case TEX_TARGET_1D:
+ assert(tex->tex.levelZero);
+ return 0x0;
+ case TEX_TARGET_2D:
+ case TEX_TARGET_RECT:
+ if (tex->tex.levelZero)
+ return 0x2;
+ if (tex->op == OP_TXL)
+ return 0x3;
+ return 0x1;
+ case TEX_TARGET_2D_SHADOW:
+ case TEX_TARGET_RECT_SHADOW:
+ if (tex->tex.levelZero)
+ return 0x6;
+ if (tex->op == OP_TXL)
+ return 0x5;
+ return 0x4;
+ case TEX_TARGET_2D_ARRAY:
+ if (tex->tex.levelZero)
+ return 0x8;
+ return 0x7;
+ case TEX_TARGET_2D_ARRAY_SHADOW:
+ assert(tex->tex.levelZero);
+ return 0x9;
+ case TEX_TARGET_3D:
+ if (tex->tex.levelZero)
+ return 0xb;
+ assert(tex->op != OP_TXL);
+ return 0xa;
+ case TEX_TARGET_CUBE:
+ assert(!tex->tex.levelZero);
+ if (tex->op == OP_TXL)
+ return 0xd;
+ return 0xc;
+ default:
+ assert(false);
+ return 0x0;
+ }
+}
+
+static uint8_t
+getTLDSTarget(const TexInstruction *tex)
+{
+ switch (tex->tex.target.getEnum()) {
+ case TEX_TARGET_1D:
+ if (tex->tex.levelZero)
+ return 0x0;
+ return 0x1;
+ case TEX_TARGET_2D:
+ case TEX_TARGET_RECT:
+ if (tex->tex.levelZero)
+ return tex->tex.useOffsets ? 0x4 : 0x2;
+ return tex->tex.useOffsets ? 0xc : 0x5;
+ case TEX_TARGET_2D_MS:
+ assert(tex->tex.levelZero);
+ return 0x6;
+ case TEX_TARGET_3D:
+ assert(tex->tex.levelZero);
+ return 0x7;
+ case TEX_TARGET_2D_ARRAY:
+ assert(tex->tex.levelZero);
+ return 0x8;
+
+ default:
+ assert(false);
+ return 0x0;
+ }
+}
+
void
CodeEmitterGM107::emitTEX()
{
@@ -2698,6 +2796,49 @@ CodeEmitterGM107::emitTEX()
emitGPR (0x00, insn->def(0));
}
+void
+CodeEmitterGM107::emitTEXS()
+{
+ const TexInstruction *insn = this->insn->asTex();
+
+ switch (insn->op) {
+ case OP_TEX:
+ case OP_TXL:
+ emitInsn (0xd8000000);
+ emitField(0x35, 4, getTEXSTarget(insn));
+ emitField(0x32, 3, getTEXSMask(insn->tex.mask));
+ break;
+ case OP_TXF:
+ emitInsn (0xda000000);
+ emitField(0x35, 4, getTLDSTarget(insn));
+ emitField(0x32, 3, getTEXSMask(insn->tex.mask));
+ break;
+ case OP_TXG:
+ assert(insn->tex.useOffsets != 4);
+ emitInsn (0xdf000000);
+ emitField(0x34, 2, insn->tex.gatherComp);
+ emitField(0x33, 1, insn->tex.useOffsets == 1);
+ emitField(0x32, 1, insn->tex.target.isShadow());
+ break;
+ default:
+ unreachable("unknown op in emitTEXS()");
+ break;
+ }
+
+ emitField(0x31, 1, insn->tex.liveOnly);
+ emitField(0x24, 13, insn->tex.r);
+ if (insn->defExists(1))
+ emitGPR(0x1c, insn->def(1));
+ else
+ emitGPR(0x1c);
+ if (insn->srcExists(1))
+ emitGPR(0x14, insn->getSrc(1));
+ else
+ emitGPR(0x14);
+ emitGPR (0x08, insn->src(0));
+ emitGPR (0x00, insn->def(0));
+}
+
void
CodeEmitterGM107::emitTLD()
{
@@ -3409,15 +3550,26 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
emitPIXLD();
break;
case OP_TEX:
- case OP_TXB:
case OP_TXL:
+ if (insn->asTex()->tex.scalar)
+ emitTEXS();
+ else
+ emitTEX();
+ break;
+ case OP_TXB:
emitTEX();
break;
case OP_TXF:
- emitTLD();
+ if (insn->asTex()->tex.scalar)
+ emitTEXS();
+ else
+ emitTLD();
break;
case OP_TXG:
- emitTLD4();
+ if (insn->asTex()->tex.scalar)
+ emitTEXS();
+ else
+ emitTLD4();
break;
case OP_TXD:
emitTXD();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 0059ecaeb6c..87a39de99d6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -275,6 +275,9 @@ private:
void texConstraintNVE0(TexInstruction *);
void texConstraintGM107(TexInstruction *);
+ bool isScalarTexGM107(TexInstruction *);
+ void handleScalarTexGM107(TexInstruction *);
+
std::list<Instruction *> constrList;
const Target *targ;
@@ -2119,6 +2122,158 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
constrList.push_back(merge);
}
+bool
+RegAlloc::InsertConstraintsPass::isScalarTexGM107(TexInstruction *tex)
+{
+ if (tex->tex.sIndirectSrc >= 0 ||
+ tex->tex.rIndirectSrc >= 0)
+ return false;
+
+ if (tex->tex.mask == 5 || tex->tex.mask == 6)
+ return false;
+
+ switch (tex->op) {
+ case OP_TEX:
+ case OP_TXF:
+ case OP_TXG:
+ case OP_TXL:
+ break;
+ default:
+ return false;
+ }
+
+ // legal variants:
+ // TEXS.1D.LZ
+ // TEXS.2D
+ // TEXS.2D.LZ
+ // TEXS.2D.LL
+ // TEXS.2D.DC
+ // TEXS.2D.LL.DC
+ // TEXS.2D.LZ.DC
+ // TEXS.A2D
+ // TEXS.A2D.LZ
+ // TEXS.A2D.LZ.DC
+ // TEXS.3D
+ // TEXS.3D.LZ
+ // TEXS.CUBE
+ // TEXS.CUBE.LL
+
+ // TLDS.1D.LZ
+ // TLDS.1D.LL
+ // TLDS.2D.LZ
+ // TLSD.2D.LZ.AOFFI
+ // TLDS.2D.LZ.MZ
+ // TLDS.2D.LL
+ // TLDS.2D.LL.AOFFI
+ // TLDS.A2D.LZ
+ // TLDS.3D.LZ
+
+ // TLD4S: all 2D/RECT variants and only offset
+
+ switch (tex->op) {
+ case OP_TEX:
+ if (tex->tex.useOffsets)
+ return false;
+
+ switch (tex->tex.target.getEnum()) {
+ case TEX_TARGET_1D:
+ case TEX_TARGET_2D_ARRAY_SHADOW:
+ return tex->tex.levelZero;
+ case TEX_TARGET_CUBE:
+ return !tex->tex.levelZero;
+ case TEX_TARGET_2D:
+ case TEX_TARGET_2D_ARRAY:
+ case TEX_TARGET_2D_SHADOW:
+ case TEX_TARGET_3D:
+ case TEX_TARGET_RECT:
+ case TEX_TARGET_RECT_SHADOW:
+ return true;
+ default:
+ return false;
+ }
+
+ case OP_TXL:
+ if (tex->tex.useOffsets)
+ return false;
+
+ switch (tex->tex.target.getEnum()) {
+ case TEX_TARGET_2D:
+ case TEX_TARGET_2D_SHADOW:
+ case TEX_TARGET_RECT:
+ case TEX_TARGET_RECT_SHADOW:
+ case TEX_TARGET_CUBE:
+ return true;
+ default:
+ return false;
+ }
+
+ case OP_TXF:
+ switch (tex->tex.target.getEnum()) {
+ case TEX_TARGET_1D:
+ return !tex->tex.useOffsets;
+ case TEX_TARGET_2D:
+ case TEX_TARGET_RECT:
+ return true;
+ case TEX_TARGET_2D_ARRAY:
+ case TEX_TARGET_2D_MS:
+ case TEX_TARGET_3D:
+ return !tex->tex.useOffsets && tex->tex.levelZero;
+ default:
+ return false;
+ }
+
+ case OP_TXG:
+ if (tex->tex.useOffsets > 1)
+ return false;
+ if (tex->tex.mask != 0x3 && tex->tex.mask != 0xf)
+ return false;
+
+ switch (tex->tex.target.getEnum()) {
+ case TEX_TARGET_2D:
+ case TEX_TARGET_2D_MS:
+ case TEX_TARGET_2D_SHADOW:
+ case TEX_TARGET_RECT:
+ case TEX_TARGET_RECT_SHADOW:
+ return true;
+ default:
+ return false;
+ }
+
+ default:
+ return false;
+ }
+}
+
+void
+RegAlloc::InsertConstraintsPass::handleScalarTexGM107(TexInstruction *tex)
+{
+ int defCount = tex->defCount(0xff);
+ int srcCount = tex->srcCount(0xff);
+
+ tex->tex.scalar = true;
+
+ // 1. handle defs
+ if (defCount > 3)
+ condenseDefs(tex, 2, 3);
+ if (defCount > 1)
+ condenseDefs(tex, 0, 1);
+
+ // 2. handle srcs
+ // special case for TXF.A2D
+ if (tex->op == OP_TXF && tex->tex.target == TEX_TARGET_2D_ARRAY) {
+ assert(srcCount >= 3);
+ condenseSrcs(tex, 1, 2);
+ } else {
+ if (srcCount > 3)
+ condenseSrcs(tex, 2, 3);
+ // only if we have more than 2 sources
+ if (srcCount > 2)
+ condenseSrcs(tex, 0, 1);
+ }
+
+ assert(!tex->defExists(2) && !tex->srcExists(2));
+}
+
void
RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
{
@@ -2126,6 +2281,13 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
if (isTextureOp(tex->op))
textureMask(tex);
+
+ if (isScalarTexGM107(tex)) {
+ handleScalarTexGM107(tex);
+ return;
+ }
+
+ assert(!tex->tex.scalar);
condenseDefs(tex);
if (isSurfaceOp(tex->op)) {
--
2.17.1
More information about the mesa-dev
mailing list