[Nouveau] [PATCH] nv50/ir: only use FILE_LOCAL_MEMORY for temp arrays that use indirection

Ilia Mirkin imirkin at alum.mit.edu
Wed Jan 13 18:27:49 PST 2016


Previously we were treating any indirect temp array usage to mean that
everything should end up in lmem. The MemoryOpt pass would clean a lot
of that up later, but in the meanwhile we would lose a lot of
opportunity for optimization.

This helps a lot of Metro 2033 Redux and a handful of KSP shaders:

total instructions in shared programs : 6288373 -> 6261517 (-0.43%)
total gprs used in shared programs    : 944051 -> 945131 (0.11%)
total local used in shared programs   : 54116 -> 54116 (0.00%)
total bytes used in shared programs   : 50306984 -> 50092136 (-0.43%)

A typical case is for register usage to double and for instructions to
halve. A future commit can also optimize local memory usage size to be
reduced with better packing.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 65 +++++++++++++++++-----
 1 file changed, 50 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 7e3b093..507749d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -96,6 +96,13 @@ public:
          return tgsi_util_get_src_register_swizzle(&reg, chan);
       }
 
+      int getArrayId() const
+      {
+         if (isIndirect(0))
+            return fsr->Indirect.ArrayID;
+         return 0;
+      }
+
       nv50_ir::Modifier getMod(int chan) const;
 
       SrcRegister getIndirect(int dim) const
@@ -155,6 +162,13 @@ public:
          return SrcRegister(fdr->Indirect);
       }
 
+      int getArrayId() const
+      {
+         if (isIndirect(0))
+            return fdr->Indirect.ArrayID;
+         return 0;
+      }
+
    private:
       const struct tgsi_dst_register reg;
       const struct tgsi_full_dst_register *fdr;
@@ -826,7 +840,8 @@ public:
    // these registers are per-subroutine, cannot be used for parameter passing
    std::set<Location> locals;
 
-   bool mainTempsInLMem;
+   std::set<int> indirectTempArrays;
+   std::vector<int> tempArrayId;
 
    int clipVertexOutput;
 
@@ -859,8 +874,6 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
 
    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
       tgsi_dump(tokens, 0);
-
-   mainTempsInLMem = false;
 }
 
 Source::~Source()
@@ -890,6 +903,7 @@ bool Source::scanSource()
 
    textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
    resources.resize(scan.file_max[TGSI_FILE_IMAGE] + 1);
+   tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
 
    info->immd.bufSize = 0;
 
@@ -935,7 +949,8 @@ bool Source::scanSource()
    }
    tgsi_parse_free(&parse);
 
-   if (mainTempsInLMem)
+   // TODO: Compute based on relevant array sizes
+   if (indirectTempArrays.size())
       info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
 
    if (info->io.genUserClip > 0) {
@@ -1046,6 +1061,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
    unsigned sn = TGSI_SEMANTIC_GENERIC;
    unsigned si = 0;
    const unsigned first = decl->Range.First, last = decl->Range.Last;
+   const int arrayId = decl->Array.ArrayID;
 
    if (decl->Declaration.Semantic) {
       sn = decl->Semantic.Name;
@@ -1189,8 +1205,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
       for (i = first; i <= last; ++i)
          textureViews[i].target = decl->SamplerView.Resource;
       break;
-   case TGSI_FILE_NULL:
    case TGSI_FILE_TEMPORARY:
+      for (i = first; i <= last; ++i)
+         tempArrayId[i] = arrayId;
+      break;
+   case TGSI_FILE_NULL:
    case TGSI_FILE_ADDRESS:
    case TGSI_FILE_CONSTANT:
    case TGSI_FILE_IMMEDIATE:
@@ -1241,7 +1260,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
       } else
       if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
          if (insn.getDst(0).isIndirect(0))
-            mainTempsInLMem = true;
+            indirectTempArrays.insert(insn.getDst(0).getArrayId());
       } else
       if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
          info->io.globalAccess |= 0x2;
@@ -1252,7 +1271,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
       Instruction::SrcRegister src = insn.getSrc(s);
       if (src.getFile() == TGSI_FILE_TEMPORARY) {
          if (src.isIndirect(0))
-            mainTempsInLMem = true;
+            indirectTempArrays.insert(src.getArrayId());
       } else
       if (src.getFile() == TGSI_FILE_BUFFER) {
          info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
@@ -1434,6 +1453,7 @@ private:
    DataType srcTy;
 
    DataArray tData; // TGSI_FILE_TEMPORARY
+   DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
    DataArray aData; // TGSI_FILE_ADDRESS
    DataArray pData; // TGSI_FILE_PREDICATE
    DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
@@ -1637,7 +1657,7 @@ Converter::getArrayForFile(unsigned file, int idx)
 {
    switch (file) {
    case TGSI_FILE_TEMPORARY:
-      return &tData;
+      return idx == 0 ? &tData : &lData;
    case TGSI_FILE_PREDICATE:
       return &pData;
    case TGSI_FILE_ADDRESS:
@@ -1662,7 +1682,7 @@ Converter::shiftAddress(Value *index)
 Value *
 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
 {
-   const int idx2d = src.is2D() ? src.getIndex(1) : 0;
+   int idx2d = src.is2D() ? src.getIndex(1) : 0;
    const int idx = src.getIndex(0);
    const int swz = src.getSwizzle(c);
    Instruction *ld;
@@ -1704,6 +1724,14 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
       ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
       ld->perPatch = info->sv[idx].patch;
       return ld->getDef(0);
+   case TGSI_FILE_TEMPORARY: {
+      int arrayid = src.getArrayId();
+      if (!arrayid)
+         arrayid = code->tempArrayId[idx];
+      idx2d = (code->indirectTempArrays.find(arrayid) !=
+               code->indirectTempArrays.end());
+   }
+      /* fallthrough */
    default:
       return getArrayForFile(src.getFile(), idx2d)->load(
          sub.cur->values, idx, swz, shiftAddress(ptr));
@@ -1716,7 +1744,7 @@ Converter::acquireDst(int d, int c)
    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
    const unsigned f = dst.getFile();
    const int idx = dst.getIndex(0);
-   const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
+   int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
 
    if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_IMAGE)
       return NULL;
@@ -1726,6 +1754,10 @@ Converter::acquireDst(int d, int c)
        (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
       return getScratch();
 
+   if (f == TGSI_FILE_TEMPORARY)
+      idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
+         code->indirectTempArrays.end();
+
    return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
 }
 
@@ -1758,7 +1790,7 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
 {
    const unsigned f = dst.getFile();
    const int idx = dst.getIndex(0);
-   const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
+   int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
 
    if (f == TGSI_FILE_SYSTEM_VALUE) {
       assert(!ptr);
@@ -1781,6 +1813,10 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
        f == TGSI_FILE_PREDICATE ||
        f == TGSI_FILE_ADDRESS ||
        f == TGSI_FILE_OUTPUT) {
+      if (f == TGSI_FILE_TEMPORARY)
+         idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
+            code->indirectTempArrays.end();
+
       getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
    } else {
       assert(!"invalid dst file");
@@ -3417,18 +3453,17 @@ Converter::exportOutputs()
 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
      code(code),
      tgsi(NULL),
-     tData(this), aData(this), pData(this), oData(this)
+     tData(this), lData(this), aData(this), pData(this), oData(this)
 {
    info = code->info;
 
-   const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
-
    const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
    const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
    const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
    const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
 
-   tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
+   tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
+   lData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
    pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
    aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
    oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
-- 
2.4.10



More information about the Nouveau mailing list