[Nouveau] [PATCH] nv50/ir: rebase indirect temp arrays to 0, so that we use less lmem space

Ilia Mirkin imirkin at alum.mit.edu
Wed Jan 13 22:23:45 PST 2016


Reduces local memory usage in a lot of Metro 2033 Redux and a few KSP
shaders:

total local used in shared programs   : 54116 -> 30372 (-43.88%)

Probably modest advantage to execution, but it's an imporant
prerequisite to dropping some of the TGSI optimizations done by the
state tracker.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---

Seems like there ought to be a simpler way of doing this... oh well.

 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 64 +++++++++++++++++-----
 1 file changed, 50 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 0e1c332..2085978 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -841,6 +841,11 @@ public:
    std::set<Location> locals;
 
    std::set<int> indirectTempArrays;
+   struct TempBase {
+      int oldBase, newBase;
+   };
+   std::map<int, TempBase> indirectTempBases;
+   std::map<int, std::pair<int, int> > tempArrayInfo;
    std::vector<int> tempArrayId;
 
    int clipVertexOutput;
@@ -949,9 +954,19 @@ bool Source::scanSource()
    }
    tgsi_parse_free(&parse);
 
-   // TODO: Compute based on relevant array sizes
-   if (indirectTempArrays.size())
-      info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
+   if (indirectTempArrays.size()) {
+      int tempBase = 0;
+      for (std::set<int>::const_iterator it = indirectTempArrays.begin();
+           it != indirectTempArrays.end(); ++it) {
+         std::pair<int, int>& info = tempArrayInfo[*it];
+         TempBase base;
+         base.oldBase = info.first;
+         base.newBase = tempBase;
+         indirectTempBases.insert(std::make_pair(*it, base));
+         tempBase += info.second;
+      }
+      info->bin.tlsSpace += tempBase * 16;
+   }
 
    if (info->io.genUserClip > 0) {
       info->io.clipDistances = info->io.genUserClip;
@@ -1208,6 +1223,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
    case TGSI_FILE_TEMPORARY:
       for (i = first; i <= last; ++i)
          tempArrayId[i] = arrayId;
+      if (arrayId)
+         tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
+                                                   first, last - first + 1)));
       break;
    case TGSI_FILE_NULL:
    case TGSI_FILE_ADDRESS:
@@ -1374,6 +1392,7 @@ private:
    void storeDst(const tgsi::Instruction::DstRegister dst, int c,
                  Value *val, Value *ptr);
 
+   void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
    Value *applySrcMod(Value *, int s, int c);
 
    Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
@@ -1679,11 +1698,23 @@ Converter::shiftAddress(Value *index)
    return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
 }
 
+void
+Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
+{
+   std::map<int, tgsi::Source::TempBase>::const_iterator it =
+      code->indirectTempBases.find(arrayId);
+   if (it == code->indirectTempBases.end())
+      return;
+
+   idx2d = 1;
+   idx += it->second.newBase - it->second.oldBase;
+}
+
 Value *
 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
 {
    int idx2d = src.is2D() ? src.getIndex(1) : 0;
-   const int idx = src.getIndex(0);
+   int idx = src.getIndex(0);
    const int swz = src.getSwizzle(c);
    Instruction *ld;
 
@@ -1728,8 +1759,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
       int arrayid = src.getArrayId();
       if (!arrayid)
          arrayid = code->tempArrayId[idx];
-      idx2d = (code->indirectTempArrays.find(arrayid) !=
-               code->indirectTempArrays.end());
+      adjustTempIndex(arrayid, idx, idx2d);
    }
       /* fallthrough */
    default:
@@ -1743,7 +1773,7 @@ Converter::acquireDst(int d, int c)
 {
    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
    const unsigned f = dst.getFile();
-   const int idx = dst.getIndex(0);
+   int idx = dst.getIndex(0);
    int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
 
    if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_IMAGE)
@@ -1754,9 +1784,12 @@ Converter::acquireDst(int d, int c)
        (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
       return getScratch();
 
-   if (f == TGSI_FILE_TEMPORARY)
-      idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
-         code->indirectTempArrays.end();
+   if (f == TGSI_FILE_TEMPORARY) {
+      int arrayid = dst.getArrayId();
+      if (!arrayid)
+         arrayid = code->tempArrayId[idx];
+      adjustTempIndex(arrayid, idx, idx2d);
+   }
 
    return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
 }
@@ -1789,7 +1822,7 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
                     Value *val, Value *ptr)
 {
    const unsigned f = dst.getFile();
-   const int idx = dst.getIndex(0);
+   int idx = dst.getIndex(0);
    int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
 
    if (f == TGSI_FILE_SYSTEM_VALUE) {
@@ -1813,9 +1846,12 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
        f == TGSI_FILE_PREDICATE ||
        f == TGSI_FILE_ADDRESS ||
        f == TGSI_FILE_OUTPUT) {
-      if (f == TGSI_FILE_TEMPORARY)
-         idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
-            code->indirectTempArrays.end();
+      if (f == TGSI_FILE_TEMPORARY) {
+         int arrayid = dst.getArrayId();
+         if (!arrayid)
+            arrayid = code->tempArrayId[idx];
+         adjustTempIndex(arrayid, idx, idx2d);
+      }
 
       getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
    } else {
-- 
2.4.10



More information about the Nouveau mailing list