[Nouveau] [PATCH] nv50/ir: rebase indirect temp arrays to 0, so that we use less lmem space
Ilia Mirkin
imirkin at alum.mit.edu
Wed Jan 13 22:23:45 PST 2016
Reduces local memory usage in a lot of Metro 2033 Redux and a few KSP
shaders:
total local used in shared programs : 54116 -> 30372 (-43.88%)
Probably modest advantage to execution, but it's an imporant
prerequisite to dropping some of the TGSI optimizations done by the
state tracker.
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
Seems like there ought to be a simpler way of doing this... oh well.
.../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 64 +++++++++++++++++-----
1 file changed, 50 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 0e1c332..2085978 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -841,6 +841,11 @@ public:
std::set<Location> locals;
std::set<int> indirectTempArrays;
+ struct TempBase {
+ int oldBase, newBase;
+ };
+ std::map<int, TempBase> indirectTempBases;
+ std::map<int, std::pair<int, int> > tempArrayInfo;
std::vector<int> tempArrayId;
int clipVertexOutput;
@@ -949,9 +954,19 @@ bool Source::scanSource()
}
tgsi_parse_free(&parse);
- // TODO: Compute based on relevant array sizes
- if (indirectTempArrays.size())
- info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
+ if (indirectTempArrays.size()) {
+ int tempBase = 0;
+ for (std::set<int>::const_iterator it = indirectTempArrays.begin();
+ it != indirectTempArrays.end(); ++it) {
+ std::pair<int, int>& info = tempArrayInfo[*it];
+ TempBase base;
+ base.oldBase = info.first;
+ base.newBase = tempBase;
+ indirectTempBases.insert(std::make_pair(*it, base));
+ tempBase += info.second;
+ }
+ info->bin.tlsSpace += tempBase * 16;
+ }
if (info->io.genUserClip > 0) {
info->io.clipDistances = info->io.genUserClip;
@@ -1208,6 +1223,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_FILE_TEMPORARY:
for (i = first; i <= last; ++i)
tempArrayId[i] = arrayId;
+ if (arrayId)
+ tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
+ first, last - first + 1)));
break;
case TGSI_FILE_NULL:
case TGSI_FILE_ADDRESS:
@@ -1374,6 +1392,7 @@ private:
void storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr);
+ void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
Value *applySrcMod(Value *, int s, int c);
Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
@@ -1679,11 +1698,23 @@ Converter::shiftAddress(Value *index)
return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
}
+void
+Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
+{
+ std::map<int, tgsi::Source::TempBase>::const_iterator it =
+ code->indirectTempBases.find(arrayId);
+ if (it == code->indirectTempBases.end())
+ return;
+
+ idx2d = 1;
+ idx += it->second.newBase - it->second.oldBase;
+}
+
Value *
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
{
int idx2d = src.is2D() ? src.getIndex(1) : 0;
- const int idx = src.getIndex(0);
+ int idx = src.getIndex(0);
const int swz = src.getSwizzle(c);
Instruction *ld;
@@ -1728,8 +1759,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
int arrayid = src.getArrayId();
if (!arrayid)
arrayid = code->tempArrayId[idx];
- idx2d = (code->indirectTempArrays.find(arrayid) !=
- code->indirectTempArrays.end());
+ adjustTempIndex(arrayid, idx, idx2d);
}
/* fallthrough */
default:
@@ -1743,7 +1773,7 @@ Converter::acquireDst(int d, int c)
{
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
const unsigned f = dst.getFile();
- const int idx = dst.getIndex(0);
+ int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_IMAGE)
@@ -1754,9 +1784,12 @@ Converter::acquireDst(int d, int c)
(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
return getScratch();
- if (f == TGSI_FILE_TEMPORARY)
- idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
- code->indirectTempArrays.end();
+ if (f == TGSI_FILE_TEMPORARY) {
+ int arrayid = dst.getArrayId();
+ if (!arrayid)
+ arrayid = code->tempArrayId[idx];
+ adjustTempIndex(arrayid, idx, idx2d);
+ }
return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
}
@@ -1789,7 +1822,7 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr)
{
const unsigned f = dst.getFile();
- const int idx = dst.getIndex(0);
+ int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (f == TGSI_FILE_SYSTEM_VALUE) {
@@ -1813,9 +1846,12 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
f == TGSI_FILE_PREDICATE ||
f == TGSI_FILE_ADDRESS ||
f == TGSI_FILE_OUTPUT) {
- if (f == TGSI_FILE_TEMPORARY)
- idx2d = code->indirectTempArrays.find(code->tempArrayId[idx]) !=
- code->indirectTempArrays.end();
+ if (f == TGSI_FILE_TEMPORARY) {
+ int arrayid = dst.getArrayId();
+ if (!arrayid)
+ arrayid = code->tempArrayId[idx];
+ adjustTempIndex(arrayid, idx, idx2d);
+ }
getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
} else {
--
2.4.10
More information about the Nouveau
mailing list