Mesa (master): nvc0: add bindless image support for kepler

Ilia Mirkin imirkin at kemper.freedesktop.org
Sun Jan 7 16:43:44 UTC 2018


Module: Mesa
Branch: master
Commit: 23a6e8d8ff5effa7b88bd77c3bf396879a2ece0c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=23a6e8d8ff5effa7b88bd77c3bf396879a2ece0c

Author: Ilia Mirkin <imirkin at alum.mit.edu>
Date:   Sat Jun 24 00:44:35 2017 -0400

nvc0: add bindless image support for kepler

A part of the driver constbuf area is allocated for bindless images. Any
update requires uploading to all driver constbufs. This also extends the
driver constbuf to 64KB, up from 2KB.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>

---

 .../drivers/nouveau/codegen/nv50_ir_driver.h       |   1 +
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 148 ++++++++++++++++-----
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      |  57 ++++----
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |   2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  12 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |   1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  10 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h     |   6 +
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c        | 100 ++++++++++++--
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c        |   5 +
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c    |   5 +
 11 files changed, 272 insertions(+), 75 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 520d1d6743..3d0782f86b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -180,6 +180,7 @@ struct nv50_ir_prog_info
       uint16_t texBindBase;      /* base address for tex handles (nve4) */
       uint16_t fbtexBindBase;    /* base address for fbtex handle (nve4) */
       uint16_t suInfoBase;       /* base address for surface info (nve4) */
+      uint16_t bindlessBase;     /* base address for bindless image info (nve4) */
       uint16_t bufInfoBase;      /* base address for buffer info */
       uint16_t sampleInfoBase;   /* base address for sample positions */
       uint8_t msInfoCBSlot;      /* cX[] used for multisample info */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 6c615806b8..09b5228127 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -20,6 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "tgsi/tgsi_build.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_util.h"
@@ -163,6 +164,12 @@ public:
          return SrcRegister(fdr->Indirect);
       }
 
+      struct tgsi_full_src_register asSrc()
+      {
+         assert(fdr);
+         return tgsi_full_src_register_from_dst(fdr);
+      }
+
       int getArrayId() const
       {
          if (isIndirect(0))
@@ -1503,13 +1510,6 @@ void Source::scanInstructionSrc(const Instruction& insn,
       if (src.isIndirect(0))
          indirectTempArrays.insert(src.getArrayId());
    } else
-   if (src.getFile() == TGSI_FILE_BUFFER ||
-       src.getFile() == TGSI_FILE_IMAGE ||
-       (src.getFile() == TGSI_FILE_MEMORY &&
-        memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
-      info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
-         0x1 : 0x2;
-   } else
    if (src.getFile() == TGSI_FILE_OUTPUT) {
       if (src.isIndirect(0)) {
          // We don't know which one is accessed, just mark everything for
@@ -1580,6 +1580,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
          if (isEdgeFlagPassthrough(insn))
             info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
       } else
+      if (dst.getFile() != TGSI_FILE_MEMORY &&
+          insn.getOpcode() == TGSI_OPCODE_STORE) {
+         info->io.globalAccess |= 0x2;
+      } else
       if (dst.getFile() == TGSI_FILE_TEMPORARY) {
          if (dst.isIndirect(0))
             indirectTempArrays.insert(dst.getArrayId());
@@ -1592,6 +1596,29 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
       }
    }
 
+   if (insn.srcCount() && (
+             insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||
+             memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==
+             TGSI_MEMORY_TYPE_GLOBAL)) {
+      switch (insn.getOpcode()) {
+      case TGSI_OPCODE_ATOMUADD:
+      case TGSI_OPCODE_ATOMXCHG:
+      case TGSI_OPCODE_ATOMCAS:
+      case TGSI_OPCODE_ATOMAND:
+      case TGSI_OPCODE_ATOMOR:
+      case TGSI_OPCODE_ATOMXOR:
+      case TGSI_OPCODE_ATOMUMIN:
+      case TGSI_OPCODE_ATOMIMIN:
+      case TGSI_OPCODE_ATOMUMAX:
+      case TGSI_OPCODE_ATOMIMAX:
+      case TGSI_OPCODE_LOAD:
+         info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+            0x1 : 0x2;
+         break;
+      }
+   }
+
+
    for (unsigned s = 0; s < insn.srcCount(); ++s)
       scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
 
@@ -1648,6 +1675,7 @@ private:
    Value *getOutputBase(int s);
    DataArray *getArrayForFile(unsigned file, int idx);
    Value *fetchSrc(int s, int c);
+   Value *fetchDst(int d, int c);
    Value *acquireDst(int d, int c);
    void storeDst(int d, int c, Value *);
 
@@ -1956,6 +1984,47 @@ Converter::fetchSrc(int s, int c)
    return applySrcMod(res, s, c);
 }
 
+Value *
+Converter::fetchDst(int d, int c)
+{
+   Value *res;
+   Value *ptr = NULL, *dimRel = NULL;
+
+   tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
+
+   if (dst.isIndirect(0))
+      ptr = fetchSrc(dst.getIndirect(0), 0, NULL);
+
+   if (dst.is2D()) {
+      switch (dst.getFile()) {
+      case TGSI_FILE_OUTPUT:
+         assert(0); // TODO
+         dimRel = NULL;
+         break;
+      case TGSI_FILE_INPUT:
+         assert(0); // TODO
+         dimRel = NULL;
+         break;
+      case TGSI_FILE_CONSTANT:
+         // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
+         if (dst.isIndirect(1))
+            dimRel = fetchSrc(dst.getIndirect(1), 0, 0);
+         break;
+      default:
+         break;
+      }
+   }
+
+   struct tgsi_full_src_register fsr = dst.asSrc();
+   tgsi::Instruction::SrcRegister src(&fsr);
+   res = fetchSrc(src, c, ptr);
+
+   if (dimRel)
+      res->getInsn()->setIndirect(0, 1, dimRel);
+
+   return res;
+}
+
 Converter::DataArray *
 Converter::getArrayForFile(unsigned file, int idx)
 {
@@ -2645,7 +2714,7 @@ Converter::handleLOAD(Value *dst0[4])
             ld->setIndirect(0, 1, ind);
       }
       break;
-   case TGSI_FILE_IMAGE: {
+   default: {
       getImageCoords(off, 1);
       def.resize(4);
 
@@ -2656,22 +2725,28 @@ Converter::handleLOAD(Value *dst0[4])
             def[c] = dst0[c];
       }
 
+      bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
+      if (bindless)
+         ind = fetchSrc(0, 0);
+
       TexInstruction *ld =
-         mkTex(OP_SULDP, tgsi.getImageTarget(), r, 0, def, off);
+         mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);
       ld->tex.mask = tgsi.getDst(0).getMask();
       ld->tex.format = tgsi.getImageFormat();
       ld->cache = tgsi.getCacheMode();
+      ld->tex.bindless = bindless;
+      if (!bindless)
+         ld->tex.r = r;
       if (ind)
          ld->setIndirectR(ind);
 
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
          if (dst0[c] != def[c])
             mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
-      }
       break;
-   default:
-      assert(!"Unsupported srcFile for LOAD");
    }
+   }
+
 
 /* Keep this around for now as reference when adding img support
    getResourceCoords(off, r, 1);
@@ -2780,24 +2855,30 @@ Converter::handleSTORE()
             st->setIndirect(0, 1, ind);
       }
       break;
-   case TGSI_FILE_IMAGE: {
+   default: {
       getImageCoords(off, 0);
       src = off;
 
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
          src.push_back(fetchSrc(1, c));
 
+      bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;
+      if (bindless)
+         ind = fetchDst(0, 0);
+
       TexInstruction *st =
-         mkTex(OP_SUSTP, tgsi.getImageTarget(), r, 0, dummy, src);
+         mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);
       st->tex.mask = tgsi.getDst(0).getMask();
       st->tex.format = tgsi.getImageFormat();
       st->cache = tgsi.getCacheMode();
+      st->tex.bindless = bindless;
+      if (!bindless)
+         st->tex.r = r;
       if (ind)
          st->setIndirectR(ind);
-      }
+
       break;
-   default:
-      assert(!"Unsupported dstFile for STORE");
+   }
    }
 
 /* Keep this around for now as reference when adding img support
@@ -2896,7 +2977,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
          if (dst0[c])
             dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
       break;
-   case TGSI_FILE_IMAGE: {
+   default: {
       getImageCoords(srcv, 1);
       defv.push_back(dst);
       srcv.push_back(fetchSrc(2, 0));
@@ -2904,22 +2985,27 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
       if (subOp == NV50_IR_SUBOP_ATOM_CAS)
          srcv.push_back(fetchSrc(3, 0));
 
+      bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
+      if (bindless)
+         ind = fetchSrc(0, 0);
+
       TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),
-                                  r, 0, defv, srcv);
+                                  0, 0, defv, srcv);
       tex->subOp = subOp;
       tex->tex.mask = 1;
       tex->tex.format = tgsi.getImageFormat();
       tex->setType(ty);
+      tex->tex.bindless = bindless;
+      if (!bindless)
+         tex->tex.r = r;
       if (ind)
          tex->setIndirectR(ind);
 
       for (int c = 0; c < 4; ++c)
          if (dst0[c])
             dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
-      }
       break;
-   default:
-      assert(!"Unsupported srcFile for ATOM");
+   }
    }
 
 /* Keep this around for now as reference when adding img support
@@ -3122,7 +3208,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
 
    unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
 
-   if (tgsi.dstCount()) {
+   if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
       for (c = 0; c < 4; ++c) {
          rDst0[c] = acquireDst(0, c);
          dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
@@ -3747,8 +3833,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
          if (ind)
             geni->setIndirect(0, 1, ind);
       } else {
-         assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE);
-
          TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
          for (int c = 0, d = 0; c < 4; ++c) {
             if (dst0[c]) {
@@ -3756,12 +3840,16 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
                texi->tex.mask |= 1 << c;
             }
          }
-         texi->tex.r = tgsi.getSrc(0).getIndex(0);
+         if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {
+            texi->tex.r = tgsi.getSrc(0).getIndex(0);
+            if (tgsi.getSrc(0).isIndirect(0))
+               texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+         } else {
+            texi->tex.bindless = true;
+            texi->setIndirectR(fetchSrc(0, 0));
+         }
          texi->tex.target = tgsi.getImageTarget();
 
-         if (tgsi.getSrc(0).isIndirect(0))
-            texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
-
          bb->insertTail(texi);
       }
       break;
@@ -4117,7 +4205,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       break;
    }
 
-   if (tgsi.dstCount()) {
+   if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
       for (c = 0; c < 4; ++c) {
          if (!dst0[c])
             continue;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 4e65d449eb..e07f57e782 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1742,19 +1742,23 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
 #define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
 
 inline Value *
-NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off)
+NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless)
 {
    uint32_t base = slot * NVC0_SU_INFO__STRIDE;
 
    if (ptr) {
       ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
-      ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
+      if (bindless)
+         ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(511));
+      else
+         ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
       ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(6));
       base = 0;
    }
    off += base;
 
-   return loadResInfo32(ptr, off, prog->driver->io.suInfoBase);
+   return loadResInfo32(ptr, off, bindless ? prog->driver->io.bindlessBase :
+                        prog->driver->io.suInfoBase);
 }
 
 static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
@@ -1800,7 +1804,7 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq)
       } else {
          offset = NVC0_SU_INFO_SIZE(c);
       }
-      bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset));
+      bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset, suq->tex.bindless));
       if (c == 2 && suq->tex.target.isCube())
          bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1),
                    bld.loadImm(NULL, 6));
@@ -1808,8 +1812,8 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq)
 
    if (mask & 1) {
       if (suq->tex.target.isMS()) {
-         Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0));
-         Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1));
+         Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless);
+         Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless);
          Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, ms_y);
          bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), ms);
       } else {
@@ -1842,8 +1846,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
    Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
    Value *ind = tex->getIndirectR();
 
-   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0));
-   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1));
+   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
+   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
 
    bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
    bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
@@ -1903,9 +1907,9 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
 
       src[c] = bld.getScratch();
       if (c == 0 && raw)
-         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X);
+         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X, su->tex.bindless);
       else
-         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc));
+         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc), su->tex.bindless);
       bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero)
          ->subOp = getSuClampSubOp(su, dimc);
    }
@@ -1927,16 +1931,16 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
          bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff));
    } else
    if (dim == 3) {
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
       bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
          ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
 
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless);
       bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
          ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
    } else {
       assert(dim == 2);
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless);
       bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
          ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ?
          NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
@@ -1947,7 +1951,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
       if (raw) {
          bf = src[0];
       } else {
-         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT);
+         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless);
          bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero)
             ->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
       }
@@ -1964,7 +1968,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
       case 2:
          z = off;
          if (!su->tex.target.isArray() && !su->tex.target.isCube()) {
-            z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C);
+            z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
             subOp = NV50_IR_SUBOP_SUBFM_3D;
          }
          break;
@@ -1979,7 +1983,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
    }
 
    // part 2
-   v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR);
+   v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless);
 
    if (su->tex.target == TEX_TARGET_BUFFER) {
       eau = v;
@@ -1988,7 +1992,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
    }
    // add array layer offset
    if (su->tex.target.isArray() || su->tex.target.isCube()) {
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
       if (dim == 1)
          bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
             ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
@@ -2028,7 +2032,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
 
    // let's just set it 0 for raw access and hope it works
    v = raw ?
-      bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT);
+      bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless);
 
    // get rid of old coordinate sources, make space for fmt info and predicate
    su->moveSources(arg, 3 - arg);
@@ -2036,12 +2040,13 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
    su->setSrc(0, addr);
    su->setSrc(1, v);
    su->setSrc(2, pred);
+   su->setIndirectR(NULL);
 
    // prevent read fault when the image is not actually bound
    CmpInstruction *pred1 =
       bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
                 TYPE_U32, bld.mkImm(0),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
 
    if (su->op != OP_SUSTP && su->tex.format) {
       const TexInstruction::ImgFormatDesc *format = su->tex.format;
@@ -2052,7 +2057,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
       assert(format->components != 0);
       bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred1->getDef(0),
                 TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                 pred1->getDef(0));
    }
    su->setPredicate(CC_NOT_P, pred1->getDef(0));
@@ -2247,13 +2252,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
 
    // calculate pixel offset
    if (su->op == OP_SULDP || su->op == OP_SUREDP) {
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless);
       su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
    }
 
    // add array layer offset
    if (su->tex.target.isArray() || su->tex.target.isCube()) {
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
       assert(dim > 1);
       su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
    }
@@ -2262,7 +2267,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
    CmpInstruction *pred =
       bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
                 TYPE_U32, bld.mkImm(0),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
    if (su->op != OP_SUSTP && su->tex.format) {
       const TexInstruction::ImgFormatDesc *format = su->tex.format;
       int blockwidth = format->bits[0] + format->bits[1] +
@@ -2272,7 +2277,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
       // make sure that the format doesn't mismatch when it's not FMT_NONE
       bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
                 TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                 pred->getDef(0));
    }
    su->setPredicate(CC_NOT_P, pred->getDef(0));
@@ -2361,7 +2366,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
    CmpInstruction *pred =
       bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
                 TYPE_U32, bld.mkImm(0),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
    if (su->op != OP_SUSTP && su->tex.format) {
       const TexInstruction::ImgFormatDesc *format = su->tex.format;
       int blockwidth = format->bits[0] + format->bits[1] +
@@ -2371,7 +2376,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
       // make sure that the format doesn't mismatch when it's not FMT_NONE
       bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
                 TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                 pred->getDef(0));
    }
    su->setPredicate(CC_NOT_P, pred->getDef(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 7fae7e24b9..37d5297665 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -130,7 +130,7 @@ private:
    Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
-   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off);
+   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
    Value *loadBufInfo64(Value *ptr, uint32_t off);
    Value *loadBufLength32(Value *ptr, uint32_t off);
    Value *loadUboInfo64(Value *ptr, uint32_t off);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index c5b625ecb4..0729c88dff 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -108,9 +108,9 @@
 /* 6 user uniform buffers, at 64K each */
 #define NVC0_CB_USR_INFO(s)         (s << 16)
 #define NVC0_CB_USR_SIZE            (6 << 16)
-/* 6 driver constbuts, at 2K each */
-#define NVC0_CB_AUX_INFO(s)         NVC0_CB_USR_SIZE + (s << 11)
-#define NVC0_CB_AUX_SIZE            (1 << 11)
+/* 6 driver constbuts, at 64K each */
+#define NVC0_CB_AUX_INFO(s)         NVC0_CB_USR_SIZE + (s << 16)
+#define NVC0_CB_AUX_SIZE            (1 << 16)
 /* XXX: Figure out what this UNK data is. */
 #define NVC0_CB_AUX_UNK_INFO        0x000
 #define NVC0_CB_AUX_UNK_SIZE        (8 * 4)
@@ -146,6 +146,9 @@
 /* 1 64-bits address and 1 32-bits sequence */
 #define NVC0_CB_AUX_MP_INFO         0x620
 #define NVC0_CB_AUX_MP_SIZE         3 * 4
+/* 512 64-byte blocks for bindless image handles */
+#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4
+#define NVC0_CB_AUX_BINDLESS_SIZE   (NVE4_IMG_MAX_HANDLES * 16 * 4)
 /* 4 32-bits floats for the vertex runout, put at the end */
 #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
 
@@ -355,7 +358,8 @@ void nvc0_validate_textures(struct nvc0_context *);
 void nvc0_validate_samplers(struct nvc0_context *);
 void nve4_set_tex_handles(struct nvc0_context *);
 void nvc0_validate_surfaces(struct nvc0_context *);
-void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_image_view *,
+void nve4_set_surface_info(struct nouveau_pushbuf *,
+                           const struct pipe_image_view *,
                            struct nvc0_context *);
 void nvc0_mark_image_range_valid(const struct pipe_image_view *);
 bool nvc0_update_tic(struct nvc0_context *, struct nv50_tic_entry *,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 46a15d76df..e6157f550d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -592,6 +592,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
    if (info->target >= NVISA_GK104_CHIPSET) {
       info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
       info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
+      info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0);
    }
 
    if (prog->type == PIPE_SHADER_COMPUTE) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index cac4bb8927..e51d516353 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -1057,7 +1057,8 @@ nvc0_screen_create(struct nouveau_device *dev)
    if (ret)
       FAIL_SCREEN_INIT("Error allocating TEXT area: %d\n", ret);
 
-   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL,
+   /* 6 user uniform areas, 6 driver areas, and 1 for the runout */
+   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 13 << 16, NULL,
                         &screen->uniform_bo);
    if (ret)
       FAIL_SCREEN_INIT("Error allocating uniform BO: %d\n", ret);
@@ -1279,8 +1280,11 @@ nvc0_screen_create(struct nouveau_device *dev)
 
    PUSH_KICK (push);
 
-   screen->tic.entries = CALLOC(4096, sizeof(void *));
-   screen->tsc.entries = screen->tic.entries + 2048;
+   screen->tic.entries = CALLOC(
+         NVC0_TIC_MAX_ENTRIES + NVC0_TSC_MAX_ENTRIES + NVE4_IMG_MAX_HANDLES,
+         sizeof(void *));
+   screen->tsc.entries = screen->tic.entries + NVC0_TIC_MAX_ENTRIES;
+   screen->img.entries = (void *)(screen->tsc.entries + NVC0_TSC_MAX_ENTRIES);
 
    if (!nvc0_blitter_create(screen))
       goto fail;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 30635c757f..efd62a8a41 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -13,6 +13,7 @@
 
 #define NVC0_TIC_MAX_ENTRIES 2048
 #define NVC0_TSC_MAX_ENTRIES 2048
+#define NVE4_IMG_MAX_HANDLES 512
 
 /* doesn't count driver-reserved slot */
 #define NVC0_MAX_PIPE_CONSTBUFS         15
@@ -97,6 +98,11 @@ struct nvc0_screen {
    } tsc;
 
    struct {
+      struct pipe_image_view **entries;
+      int next;
+   } img;
+
+   struct {
       struct nouveau_bo *bo;
       uint32_t *map;
    } fence;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 88cec7fcef..9e391fe1ac 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -876,20 +876,13 @@ nve4_make_texture_handle_resident(struct pipe_context *pipe,
    }
 }
 
-void
-nvc0_init_bindless_functions(struct pipe_context *pipe) {
-   pipe->create_texture_handle = nve4_create_texture_handle;
-   pipe->delete_texture_handle = nve4_delete_texture_handle;
-   pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
-}
-
 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
 
 static void
-nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height,
-                      int *depth)
+nvc0_get_surface_dims(const struct pipe_image_view *view,
+                      int *width, int *height, int *depth)
 {
    struct nv04_resource *res = nv04_resource(view->resource);
    int level;
@@ -937,7 +930,7 @@ nvc0_mark_image_range_valid(const struct pipe_image_view *view)
 
 void
 nve4_set_surface_info(struct nouveau_pushbuf *push,
-                      struct pipe_image_view *view,
+                      const struct pipe_image_view *view,
                       struct nvc0_context *nvc0)
 {
    struct nvc0_screen *screen = nvc0->screen;
@@ -1070,7 +1063,7 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
 
 static inline void
 nvc0_set_surface_info(struct nouveau_pushbuf *push,
-                      struct pipe_image_view *view, uint64_t address,
+                      const struct pipe_image_view *view, uint64_t address,
                       int width, int height, int depth)
 {
    struct nv04_resource *res;
@@ -1319,6 +1312,91 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0)
    }
 }
 
+static uint64_t
+nve4_create_image_handle(struct pipe_context *pipe,
+                         const struct pipe_image_view *view)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_screen *screen = nvc0->screen;
+   int i = screen->img.next, s;
+
+   while (screen->img.entries[i]) {
+      i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
+      if (i == screen->img.next)
+         return 0;
+   }
+
+   screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
+   screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view));
+   *screen->img.entries[i] = *view;
+
+   for (s = 0; s < 6; s++) {
+      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
+      PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i));
+      nve4_set_surface_info(push, view, nvc0);
+   }
+
+   return 0x100000000ULL | i;
+}
+
+static void
+nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
+   int i = handle & (NVE4_IMG_MAX_HANDLES - 1);
+
+   free(screen->img.entries[i]);
+   screen->img.entries[i] = NULL;
+}
+
+static void
+nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
+                                unsigned access, bool resident)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
+
+   if (resident) {
+      struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
+      struct pipe_image_view *view =
+         screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)];
+      assert(view);
+
+      if (view->resource->target == PIPE_BUFFER &&
+          access & PIPE_IMAGE_ACCESS_WRITE)
+         nvc0_mark_image_range_valid(view);
+      res->handle = handle;
+      res->buf = nv04_resource(view->resource);
+      res->flags = (access & 3) << 8;
+      list_add(&res->list, &nvc0->img_head);
+   } else {
+      list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
+         if (pos->handle == handle) {
+            list_del(&pos->list);
+            free(pos);
+            break;
+         }
+      }
+   }
+}
+
+void
+nvc0_init_bindless_functions(struct pipe_context *pipe) {
+   pipe->create_texture_handle = nve4_create_texture_handle;
+   pipe->delete_texture_handle = nve4_delete_texture_handle;
+   pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
+
+   pipe->create_image_handle = nve4_create_image_handle;
+   pipe->delete_image_handle = nve4_delete_image_handle;
+   pipe->make_image_handle_resident = nve4_make_image_handle_resident;
+}
+
 
 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
 {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 4cd3712203..66de6d9e2f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -977,6 +977,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                         resident->flags);
    }
 
+   list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {
+      nvc0_add_resident(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS, resident->buf,
+                        resident->flags);
+   }
+
    nvc0_state_validate_3d(nvc0, ~0);
 
    if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index db501afb9d..f641f4777d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -686,6 +686,11 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
                         resident->flags);
    }
 
+   list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {
+      nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf,
+                        resident->flags);
+   }
+
    ret = !nve4_state_validate_cp(nvc0, ~0);
    if (ret)
       goto out;




More information about the mesa-commit mailing list