[Mesa-dev] [RFC PATCH 3/3] nvc0: add bindless image support for kepler

Ilia Mirkin imirkin at alum.mit.edu
Sat Jun 24 16:01:40 UTC 2017


A part of the driver constbuf area is allocated for bindless images. Any
update requires uploading to all driver constbufs. This also extends the
driver constbuf to 64KB, up from 2KB.

As a side-effect, this also fixes indirect image access, as fetchSrc()
was done *after* the op was already created, making it be executed too
late. This change moves it further up.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h       |   1 +
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 168 ++++++++++++++++-----
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      |  57 +++----
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |   2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |   9 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |   1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |   7 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h     |   6 +
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c        |  98 ++++++++++--
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c        |   5 +
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c    |   5 +
 11 files changed, 282 insertions(+), 77 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 76f08b1c3dc..58b7b8dcb6f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -178,6 +178,7 @@ struct nv50_ir_prog_info
       uint16_t texBindBase;      /* base address for tex handles (nve4) */
       uint16_t fbtexBindBase;    /* base address for fbtex handle (nve4) */
       uint16_t suInfoBase;       /* base address for surface info (nve4) */
+      uint16_t bindlessBase;     /* base address for bindless image info (nve4) */
       uint16_t bufInfoBase;      /* base address for buffer info */
       uint16_t sampleInfoBase;   /* base address for sample positions */
       uint8_t msInfoCBSlot;      /* cX[] used for multisample info */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index e19accbc8ea..a2f384a1227 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -20,6 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "tgsi/tgsi_build.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_util.h"
@@ -163,6 +164,12 @@ public:
          return SrcRegister(fdr->Indirect);
       }
 
+      struct tgsi_full_src_register asSrc()
+      {
+         assert(fdr);
+         return tgsi_full_src_register_from_dst(fdr);
+      }
+
       int getArrayId() const
       {
          if (isIndirect(0))
@@ -1514,13 +1521,6 @@ void Source::scanInstructionSrc(const Instruction& insn,
       if (src.isIndirect(0))
          indirectTempArrays.insert(src.getArrayId());
    } else
-   if (src.getFile() == TGSI_FILE_BUFFER ||
-       src.getFile() == TGSI_FILE_IMAGE ||
-       (src.getFile() == TGSI_FILE_MEMORY &&
-        memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
-      info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
-         0x1 : 0x2;
-   } else
    if (src.getFile() == TGSI_FILE_OUTPUT) {
       if (src.isIndirect(0)) {
          // We don't know which one is accessed, just mark everything for
@@ -1591,6 +1591,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
          if (isEdgeFlagPassthrough(insn))
             info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
       } else
+      if (dst.getFile() != TGSI_FILE_MEMORY &&
+          insn.getOpcode() == TGSI_OPCODE_STORE) {
+         info->io.globalAccess |= 0x2;
+      } else
       if (dst.getFile() == TGSI_FILE_TEMPORARY) {
          if (dst.isIndirect(0))
             indirectTempArrays.insert(dst.getArrayId());
@@ -1603,6 +1607,29 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
       }
    }
 
+   if (insn.srcCount() && (
+             insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||
+             memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==
+             TGSI_MEMORY_TYPE_GLOBAL)) {
+      switch (insn.getOpcode()) {
+      case TGSI_OPCODE_ATOMUADD:
+      case TGSI_OPCODE_ATOMXCHG:
+      case TGSI_OPCODE_ATOMCAS:
+      case TGSI_OPCODE_ATOMAND:
+      case TGSI_OPCODE_ATOMOR:
+      case TGSI_OPCODE_ATOMXOR:
+      case TGSI_OPCODE_ATOMUMIN:
+      case TGSI_OPCODE_ATOMIMIN:
+      case TGSI_OPCODE_ATOMUMAX:
+      case TGSI_OPCODE_ATOMIMAX:
+      case TGSI_OPCODE_LOAD:
+         info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+            0x1 : 0x2;
+         break;
+      }
+   }
+
+
    for (unsigned s = 0; s < insn.srcCount(); ++s)
       scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
 
@@ -1659,6 +1686,7 @@ private:
    Value *getOutputBase(int s);
    DataArray *getArrayForFile(unsigned file, int idx);
    Value *fetchSrc(int s, int c);
+   Value *fetchDst(int d, int c);
    Value *acquireDst(int d, int c);
    void storeDst(int d, int c, Value *);
 
@@ -1967,6 +1995,47 @@ Converter::fetchSrc(int s, int c)
    return applySrcMod(res, s, c);
 }
 
+Value *
+Converter::fetchDst(int d, int c)
+{
+   Value *res;
+   Value *ptr = NULL, *dimRel = NULL;
+
+   tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
+
+   if (dst.isIndirect(0))
+      ptr = fetchSrc(dst.getIndirect(0), 0, NULL);
+
+   if (dst.is2D()) {
+      switch (dst.getFile()) {
+      case TGSI_FILE_OUTPUT:
+         assert(0); // TODO
+         dimRel = NULL;
+         break;
+      case TGSI_FILE_INPUT:
+         assert(0); // TODO
+         dimRel = NULL;
+         break;
+      case TGSI_FILE_CONSTANT:
+         // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
+         if (dst.isIndirect(1))
+            dimRel = fetchSrc(dst.getIndirect(1), 0, 0);
+         break;
+      default:
+         break;
+      }
+   }
+
+   struct tgsi_full_src_register fsr = dst.asSrc();
+   tgsi::Instruction::SrcRegister src(&fsr);
+   res = fetchSrc(src, c, ptr);
+
+   if (dimRel)
+      res->getInsn()->setIndirect(0, 1, dimRel);
+
+   return res;
+}
+
 Converter::DataArray *
 Converter::getArrayForFile(unsigned file, int idx)
 {
@@ -2652,7 +2721,7 @@ Converter::handleLOAD(Value *dst0[4])
             ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
       }
       break;
-   case TGSI_FILE_IMAGE: {
+   default: {
       getImageCoords(off, 1);
       def.resize(4);
 
@@ -2663,23 +2732,32 @@ Converter::handleLOAD(Value *dst0[4])
             def[c] = dst0[c];
       }
 
+      bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
+      Value *ind = NULL;
+      if (bindless)
+         ind = fetchSrc(0, 0);
+      else if (tgsi.getSrc(0).isIndirect(0))
+         ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL);
+
       TexInstruction *ld =
-         mkTex(OP_SULDP, tgsi.getImageTarget(), r, 0, def, off);
+         mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);
       ld->tex.mask = tgsi.getDst(0).getMask();
       ld->tex.format = tgsi.getImageFormat();
       ld->cache = tgsi.getCacheMode();
-      if (tgsi.getSrc(0).isIndirect(0))
-         ld->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+      ld->tex.bindless = bindless;
+      if (!bindless)
+         ld->tex.r = r;
+      if (ind)
+         ld->setIndirectR(ind);
 
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
          if (dst0[c] != def[c])
             mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
-      }
       break;
-   default:
-      assert(!"Unsupported srcFile for LOAD");
+   }
    }
 
+
 /* Keep this around for now as reference when adding img support
    getResourceCoords(off, r, 1);
 
@@ -2783,24 +2861,33 @@ Converter::handleSTORE()
             st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
       }
       break;
-   case TGSI_FILE_IMAGE: {
+   default: {
       getImageCoords(off, 0);
       src = off;
 
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
          src.push_back(fetchSrc(1, c));
 
+      bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;
+      Value *ind = NULL;
+      if (bindless)
+         ind = fetchDst(0, 0);
+      else if (tgsi.getDst(0).isIndirect(0))
+         ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, NULL);
+
       TexInstruction *st =
-         mkTex(OP_SUSTP, tgsi.getImageTarget(), r, 0, dummy, src);
+         mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);
       st->tex.mask = tgsi.getDst(0).getMask();
       st->tex.format = tgsi.getImageFormat();
       st->cache = tgsi.getCacheMode();
-      if (tgsi.getDst(0).isIndirect(0))
-         st->setIndirectR(fetchSrc(tgsi.getDst(0).getIndirect(0), 0, NULL));
-      }
+      st->tex.bindless = bindless;
+      if (!bindless)
+         st->tex.r = r;
+      if (ind)
+         st->setIndirectR(ind);
+
       break;
-   default:
-      assert(!"Unsupported dstFile for STORE");
+   }
    }
 
 /* Keep this around for now as reference when adding img support
@@ -2897,7 +2984,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
          if (dst0[c])
             dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
       break;
-   case TGSI_FILE_IMAGE: {
+   default: {
       getImageCoords(srcv, 1);
       defv.push_back(dst);
       srcv.push_back(fetchSrc(2, 0));
@@ -2905,22 +2992,30 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
       if (subOp == NV50_IR_SUBOP_ATOM_CAS)
          srcv.push_back(fetchSrc(3, 0));
 
+      bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
+      Value *ind = NULL;
+      if (bindless)
+         ind = fetchSrc(0, 0);
+      else if (tgsi.getSrc(0).isIndirect(0))
+         ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL);
+
       TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),
-                                  r, 0, defv, srcv);
+                                  0, 0, defv, srcv);
       tex->subOp = subOp;
       tex->tex.mask = 1;
       tex->tex.format = tgsi.getImageFormat();
       tex->setType(ty);
-      if (tgsi.getSrc(0).isIndirect(0))
-         tex->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+      tex->tex.bindless = bindless;
+      if (!bindless)
+         tex->tex.r = r;
+      if (ind)
+         tex->setIndirectR(ind);
 
       for (int c = 0; c < 4; ++c)
          if (dst0[c])
             dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
-      }
       break;
-   default:
-      assert(!"Unsupported srcFile for ATOM");
+   }
    }
 
 /* Keep this around for now as reference when adding img support
@@ -3123,7 +3218,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
 
    unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
 
-   if (tgsi.dstCount()) {
+   if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
       for (c = 0; c < 4; ++c) {
          rDst0[c] = acquireDst(0, c);
          dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
@@ -3792,8 +3887,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
             geni->setIndirect(0, 1,
                               fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
       } else {
-         assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE);
-
          TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
          for (int c = 0, d = 0; c < 4; ++c) {
             if (dst0[c]) {
@@ -3801,12 +3894,17 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
                texi->tex.mask |= 1 << c;
             }
          }
-         texi->tex.r = tgsi.getSrc(0).getIndex(0);
+         if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {
+            texi->tex.r = tgsi.getSrc(0).getIndex(0);
+            if (tgsi.getSrc(0).isIndirect(0))
+               texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+         } else {
+            texi->tex.bindless = true;
+            texi->setIndirectR(fetchSrc(0, 0));
+         }
          texi->tex.target = tgsi.getImageTarget();
          bb->insertTail(texi);
 
-         if (tgsi.getSrc(0).isIndirect(0))
-            texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
       }
       break;
    case TGSI_OPCODE_IBFE:
@@ -4159,7 +4257,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       break;
    }
 
-   if (tgsi.dstCount()) {
+   if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
       for (c = 0; c < 4; ++c) {
          if (!dst0[c])
             continue;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 93c579d2540..e3933ed09c7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1694,19 +1694,23 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
 #define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
 
 inline Value *
-NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off)
+NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless)
 {
    uint32_t base = slot * NVC0_SU_INFO__STRIDE;
 
    if (ptr) {
       ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
-      ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
+      if (bindless)
+         ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(511));
+      else
+         ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
       ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(6));
       base = 0;
    }
    off += base;
 
-   return loadResInfo32(ptr, off, prog->driver->io.suInfoBase);
+   return loadResInfo32(ptr, off, bindless ? prog->driver->io.bindlessBase :
+                        prog->driver->io.suInfoBase);
 }
 
 static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
@@ -1752,7 +1756,7 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq)
       } else {
          offset = NVC0_SU_INFO_SIZE(c);
       }
-      bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset));
+      bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset, suq->tex.bindless));
       if (c == 2 && suq->tex.target.isCube())
          bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1),
                    bld.loadImm(NULL, 6));
@@ -1760,8 +1764,8 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq)
 
    if (mask & 1) {
       if (suq->tex.target.isMS()) {
-         Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0));
-         Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1));
+         Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless);
+         Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless);
          Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, ms_y);
          bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), ms);
       } else {
@@ -1794,8 +1798,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
    Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
    Value *ind = tex->getIndirectR();
 
-   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0));
-   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1));
+   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
+   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
 
    bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
    bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
@@ -1855,9 +1859,9 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
 
       src[c] = bld.getScratch();
       if (c == 0 && raw)
-         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X);
+         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X, su->tex.bindless);
       else
-         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc));
+         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc), su->tex.bindless);
       bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero)
          ->subOp = getSuClampSubOp(su, dimc);
    }
@@ -1879,16 +1883,16 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
          bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff));
    } else
    if (dim == 3) {
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
       bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
          ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
 
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless);
       bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
          ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
    } else {
       assert(dim == 2);
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless);
       bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
          ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ?
          NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
@@ -1899,7 +1903,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
       if (raw) {
          bf = src[0];
       } else {
-         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT);
+         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless);
          bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero)
             ->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
       }
@@ -1916,7 +1920,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
       case 2:
          z = off;
          if (!su->tex.target.isArray() && !su->tex.target.isCube()) {
-            z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C);
+            z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
             subOp = NV50_IR_SUBOP_SUBFM_3D;
          }
          break;
@@ -1931,7 +1935,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
    }
 
    // part 2
-   v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR);
+   v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless);
 
    if (su->tex.target == TEX_TARGET_BUFFER) {
       eau = v;
@@ -1940,7 +1944,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
    }
    // add array layer offset
    if (su->tex.target.isArray() || su->tex.target.isCube()) {
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
       if (dim == 1)
          bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
             ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
@@ -1980,7 +1984,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
 
    // let's just set it 0 for raw access and hope it works
    v = raw ?
-      bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT);
+      bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless);
 
    // get rid of old coordinate sources, make space for fmt info and predicate
    su->moveSources(arg, 3 - arg);
@@ -1988,12 +1992,13 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
    su->setSrc(0, addr);
    su->setSrc(1, v);
    su->setSrc(2, pred);
+   su->setIndirectR(NULL);
 
    // prevent read fault when the image is not actually bound
    CmpInstruction *pred1 =
       bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
                 TYPE_U32, bld.mkImm(0),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
 
    if (su->op != OP_SUSTP && su->tex.format) {
       const TexInstruction::ImgFormatDesc *format = su->tex.format;
@@ -2004,7 +2009,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
       assert(format->components != 0);
       bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred1->getDef(0),
                 TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                 pred1->getDef(0));
    }
    su->setPredicate(CC_NOT_P, pred1->getDef(0));
@@ -2199,13 +2204,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
 
    // calculate pixel offset
    if (su->op == OP_SULDP || su->op == OP_SUREDP) {
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless);
       su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
    }
 
    // add array layer offset
    if (su->tex.target.isArray() || su->tex.target.isCube()) {
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY);
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
       assert(dim > 1);
       su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
    }
@@ -2214,7 +2219,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
    CmpInstruction *pred =
       bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
                 TYPE_U32, bld.mkImm(0),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
    if (su->op != OP_SUSTP && su->tex.format) {
       const TexInstruction::ImgFormatDesc *format = su->tex.format;
       int blockwidth = format->bits[0] + format->bits[1] +
@@ -2224,7 +2229,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
       // make sure that the format doesn't mismatch when it's not FMT_NONE
       bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
                 TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                 pred->getDef(0));
    }
    su->setPredicate(CC_NOT_P, pred->getDef(0));
@@ -2313,7 +2318,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
    CmpInstruction *pred =
       bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
                 TYPE_U32, bld.mkImm(0),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
    if (su->op != OP_SUSTP && su->tex.format) {
       const TexInstruction::ImgFormatDesc *format = su->tex.format;
       int blockwidth = format->bits[0] + format->bits[1] +
@@ -2323,7 +2328,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
       // make sure that the format doesn't mismatch when it's not FMT_NONE
       bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
                 TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
-                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
                 pred->getDef(0));
    }
    su->setPredicate(CC_NOT_P, pred->getDef(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 7fae7e24b99..37d52976657 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -130,7 +130,7 @@ private:
    Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
-   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off);
+   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
    Value *loadBufInfo64(Value *ptr, uint32_t off);
    Value *loadBufLength32(Value *ptr, uint32_t off);
    Value *loadUboInfo64(Value *ptr, uint32_t off);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 0538a50a71b..8f5428ad059 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -108,9 +108,9 @@
 /* 6 user uniform buffers, at 64K each */
 #define NVC0_CB_USR_INFO(s)         (s << 16)
 #define NVC0_CB_USR_SIZE            (6 << 16)
-/* 6 driver constbuts, at 2K each */
-#define NVC0_CB_AUX_INFO(s)         NVC0_CB_USR_SIZE + (s << 11)
-#define NVC0_CB_AUX_SIZE            (1 << 11)
+/* 6 driver constbuts, at 64K each */
+#define NVC0_CB_AUX_INFO(s)         NVC0_CB_USR_SIZE + (s << 16)
+#define NVC0_CB_AUX_SIZE            (1 << 16)
 /* XXX: Figure out what this UNK data is. */
 #define NVC0_CB_AUX_UNK_INFO        0x000
 #define NVC0_CB_AUX_UNK_SIZE        (8 * 4)
@@ -146,6 +146,9 @@
 /* 1 64-bits address and 1 32-bits sequence */
 #define NVC0_CB_AUX_MP_INFO         0x620
 #define NVC0_CB_AUX_MP_SIZE         3 * 4
+/* 512 64-byte blocks for bindless image handles */
+#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4
+#define NVC0_CB_AUX_BINDLESS_SIZE   (NVE4_IMG_MAX_HANDLES * 16 * 4)
 /* 4 32-bits floats for the vertex runout, put at the end */
 #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index e43a8de9f59..0109ca17f59 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -590,6 +590,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
    if (info->target >= NVISA_GK104_CHIPSET) {
       info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
       info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
+      info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0);
    }
 
    if (prog->type == PIPE_SHADER_COMPUTE) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 3970c46a1ff..fb915577d79 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -1252,8 +1252,11 @@ nvc0_screen_create(struct nouveau_device *dev)
 
    PUSH_KICK (push);
 
-   screen->tic.entries = CALLOC(4096, sizeof(void *));
-   screen->tsc.entries = screen->tic.entries + 2048;
+   screen->tic.entries = CALLOC(
+         NVC0_TIC_MAX_ENTRIES + NVC0_TSC_MAX_ENTRIES + NVE4_IMG_MAX_HANDLES,
+         sizeof(void *));
+   screen->tsc.entries = screen->tic.entries + NVC0_TIC_MAX_ENTRIES;
+   screen->img.entries = (void *)screen->tsc.entries + NVC0_TSC_MAX_ENTRIES;
 
    if (!nvc0_blitter_create(screen))
       goto fail;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index e3b1d5dbcbf..c28150df382 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -13,6 +13,7 @@
 
 #define NVC0_TIC_MAX_ENTRIES 2048
 #define NVC0_TSC_MAX_ENTRIES 2048
+#define NVE4_IMG_MAX_HANDLES 512
 
 /* doesn't count driver-reserved slot */
 #define NVC0_MAX_PIPE_CONSTBUFS         15
@@ -99,6 +100,11 @@ struct nvc0_screen {
    } tsc;
 
    struct {
+      struct pipe_image_view **entries;
+      int next;
+   } img;
+
+   struct {
       struct nouveau_bo *bo;
       uint32_t *map;
    } fence;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index ab8515ea7e0..18ba9f5250f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -840,16 +840,6 @@ nve4_make_texture_handle_resident(struct pipe_context *pipe,
    }
 }
 
-void
-nvc0_init_bindless_functions(struct pipe_context *pipe) {
-   list_inithead(&nvc0_context(pipe)->tex_head);
-   list_inithead(&nvc0_context(pipe)->img_head);
-
-   pipe->create_texture_handle = nve4_create_texture_handle;
-   pipe->delete_texture_handle = nve4_delete_texture_handle;
-   pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
-}
-
 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
@@ -1286,6 +1276,94 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0)
    }
 }
 
+static uint64_t
+nve4_create_image_handle(struct pipe_context *pipe,
+                         const struct pipe_image_view *view)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_screen *screen = nvc0->screen;
+   int i = screen->img.next, s;
+
+   while (screen->img.entries[i]) {
+      i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
+      if (i == screen->img.next)
+         return 0;
+   }
+
+   screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
+   screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view));
+   *screen->img.entries[i] = *view;
+
+   for (s = 0; s < 6; s++) {
+      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
+      PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i));
+      nve4_set_surface_info(push, view, nvc0);
+   }
+
+   return 0x100000000ULL | i;
+}
+
+static void
+nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
+   int i = handle & (NVE4_IMG_MAX_HANDLES - 1);
+
+   free(screen->img.entries[i]);
+   screen->img.entries[i] = NULL;
+}
+
+static void
+nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
+                                unsigned access, bool resident)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
+
+   if (resident) {
+      struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
+      struct pipe_image_view *view =
+         screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)];
+      assert(view);
+
+      if (view->resource->target == PIPE_BUFFER &&
+          access & PIPE_IMAGE_ACCESS_WRITE)
+         nvc0_mark_image_range_valid(view);
+      res->handle = handle;
+      res->buf = nv04_resource(view->resource);
+      res->flags = (access & 3) << 8;
+      list_add(&res->list, &nvc0->img_head);
+   } else {
+      list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
+         if (pos->handle == handle) {
+            list_del(&pos->list);
+            free(pos);
+            break;
+         }
+      }
+   }
+}
+
+void
+nvc0_init_bindless_functions(struct pipe_context *pipe) {
+   list_inithead(&nvc0_context(pipe)->tex_head);
+   list_inithead(&nvc0_context(pipe)->img_head);
+
+   pipe->create_texture_handle = nve4_create_texture_handle;
+   pipe->delete_texture_handle = nve4_delete_texture_handle;
+   pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
+
+   pipe->create_image_handle = nve4_create_image_handle;
+   pipe->delete_image_handle = nve4_delete_image_handle;
+   pipe->make_image_handle_resident = nve4_make_image_handle_resident;
+}
+
 
 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
 {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index e306916e5fc..2856b4c6096 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -980,6 +980,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                         resident->flags);
    }
 
+   list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {
+      nvc0_add_resident(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS, resident->buf,
+                        resident->flags);
+   }
+
    nvc0_state_validate_3d(nvc0, ~0);
 
    if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 54c7965c105..fe33b369688 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -710,6 +710,11 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
                         resident->flags);
    }
 
+   list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {
+      nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf,
+                        resident->flags);
+   }
+
 #ifdef DEBUG
    if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
       if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
-- 
2.13.0



More information about the mesa-dev mailing list