[Mesa-dev] [PATCH 01/11] nvc0: use a different offset for buffers and surfaces

Sat Feb 27 14:50:45 UTC 2016

On 02/27/2016 03:42 PM, Ilia Mirkin wrote:
> Why wouldn't you have surfaces for other shader types? Surface == image.
> I was thinking they would just use a fixed offset from the suinfobase
> which I repurposed for buffers.
>
> Either way, why do you need to touch this now?

Because for compute on Kepler, I need to bind both images and buffers 
and if I use the same offset they will be overwritten.

>
> On Feb 27, 2016 9:02 AM, "Samuel Pitoiset" <samuel.pitoiset at gmail.com
> <mailto:samuel.pitoiset at gmail.com>> wrote:
>
>     To not overwrite buffers and surfaces on Kepler, we need a different
>     offset. This will be currently only used for compute because we have
>     to bind both surfaces and buffers.
>
>     Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com
>     <mailto:samuel.pitoiset at gmail.com>>
>     ---
>       .../drivers/nouveau/codegen/nv50_ir_driver.h       |  1 +
>       .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 80
>     ++++++++++++++++------
>       .../nouveau/codegen/nv50_ir_lowering_nvc0.h        | 12 +++-
>       src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  8 ++-
>       4 files changed, 73 insertions(+), 28 deletions(-)
>
>     diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
>     b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
>     index 4504240..479e426 100644
>     --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
>     +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
>     @@ -179,6 +179,7 @@ struct nv50_ir_prog_info
>             uint8_t resInfoCBSlot;     /* cX[] used for tex handles,
>     surface info */
>             uint16_t texBindBase;      /* base address for tex handles
>     (nve4) */
>             uint16_t suInfoBase;       /* base address for surface info
>     (nve4) */
>     +      uint16_t bufInfoBase;      /* base address for buffer info */
>             uint16_t sampleInfoBase;   /* base address for sample
>     positions */
>             uint8_t msInfoCBSlot;      /* cX[] used for multisample info */
>             uint16_t msInfoBase;       /* base address for multisample
>     info */
>     diff --git
>     a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>     b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>     index d181f15..8abdd93 100644
>     --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>     +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>     @@ -1026,7 +1026,7 @@ bool
>       NVC0LoweringPass::handleSUQ(Instruction *suq)
>       {
>          suq->op = OP_MOV;
>     -   suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
>     +   suq->setSrc(0, loadBufLength32(suq->getIndirect(0, 1),
>                                         suq->getSrc(0)->reg.fileIndex *
>     16));
>          suq->setIndirect(0, 0, NULL);
>          suq->setIndirect(0, 1, NULL);
>     @@ -1142,7 +1142,7 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
>             return true;
>          default:
>             assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
>     -      base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
>     +      base = loadBufInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
>             assert(base->reg.size == 8);
>             if (ptr)
>                base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
>     @@ -1202,19 +1202,19 @@ NVC0LoweringPass::handleCasExch(Instruction
>     *cas, bool needCctl)
>       }
>
>       inline Value *
>     -NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
>     +NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off, uint16_t
>     base)
>       {
>          uint8_t b = prog->driver->io.resInfoCBSlot;
>     -   off += prog->driver->io.suInfoBase;
>     +   off += base;
>          return bld.
>             mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b,
>     TYPE_U32, off), ptr);
>       }
>
>       inline Value *
>     -NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
>     +NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off, uint16_t
>     base)
>       {
>          uint8_t b = prog->driver->io.resInfoCBSlot;
>     -   off += prog->driver->io.suInfoBase;
>     +   off += base;
>
>          if (ptr)
>             ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr,
>     bld.mkImm(4));
>     @@ -1224,10 +1224,10 @@ NVC0LoweringPass::loadResInfo64(Value *ptr,
>     uint32_t off)
>       }
>
>       inline Value *
>     -NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
>     +NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off,
>     uint16_t base)
>       {
>          uint8_t b = prog->driver->io.resInfoCBSlot;
>     -   off += prog->driver->io.suInfoBase;
>     +   off += base;
>
>          if (ptr)
>             ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr,
>     bld.mkImm(4));
>     @@ -1237,6 +1237,42 @@ NVC0LoweringPass::loadResLength32(Value *ptr,
>     uint32_t off)
>       }
>
>       inline Value *
>     +NVC0LoweringPass::loadSufInfo32(Value *ptr, uint32_t off)
>     +{
>     +   return loadResInfo32(ptr, off, prog->driver->io.suInfoBase);
>     +}
>     +
>     +inline Value *
>     +NVC0LoweringPass::loadSufInfo64(Value *ptr, uint32_t off)
>     +{
>     +   return loadResInfo64(ptr, off, prog->driver->io.suInfoBase);
>     +}
>     +
>     +inline Value *
>     +NVC0LoweringPass::loadSufLength32(Value *ptr, uint32_t off)
>     +{
>     +   return loadResLength32(ptr, off, prog->driver->io.suInfoBase);
>     +}
>     +
>     +inline Value *
>     +NVC0LoweringPass::loadBufInfo32(Value *ptr, uint32_t off)
>     +{
>     +   return loadResInfo32(ptr, off, prog->driver->io.bufInfoBase);
>     +}
>     +
>     +inline Value *
>     +NVC0LoweringPass::loadBufInfo64(Value *ptr, uint32_t off)
>     +{
>     +   return loadResInfo64(ptr, off, prog->driver->io.bufInfoBase);
>     +}
>     +
>     +inline Value *
>     +NVC0LoweringPass::loadBufLength32(Value *ptr, uint32_t off)
>     +{
>     +   return loadResLength32(ptr, off, prog->driver->io.bufInfoBase);
>     +}
>     +
>     +inline Value *
>       NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
>       {
>          uint8_t b = prog->driver->io.msInfoCBSlot;
>     @@ -1316,8 +1352,8 @@
>     NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
>
>          Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
>
>     -   Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0));
>     -   Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1));
>     +   Value *ms_x = loadSufInfo32(NULL, base + NVE4_SU_INFO_MS(0));
>     +   Value *ms_y = loadSufInfo32(NULL, base + NVE4_SU_INFO_MS(1));
>
>          bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
>          bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
>     @@ -1370,9 +1406,9 @@
>     NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>          for (c = 0; c < arg; ++c) {
>             src[c] = bld.getScratch();
>             if (c == 0 && raw)
>     -         v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X);
>     +         v = loadSufInfo32(NULL, base + NVE4_SU_INFO_RAW_X);
>             else
>     -         v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c));
>     +         v = loadSufInfo32(NULL, base + NVE4_SU_INFO_DIM(c));
>             bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero)
>                ->subOp = getSuClampSubOp(su, c);
>          }
>     @@ -1394,16 +1430,16 @@
>     NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>                bld.mkOp2(OP_AND, TYPE_U32, off, src[0],
>     bld.loadImm(NULL, 0xffff));
>          } else
>          if (dim == 3) {
>     -      v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
>     +      v = loadSufInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
>             bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
>                ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
>
>     -      v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
>     +      v = loadSufInfo32(NULL, base + NVE4_SU_INFO_PITCH);
>             bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
>                ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
>          } else {
>             assert(dim == 2);
>     -      v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
>     +      v = loadSufInfo32(NULL, base + NVE4_SU_INFO_PITCH);
>             bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
>                ->subOp = su->tex.target.isArray() ?
>                NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); //
>     u16l u16l u16l
>     @@ -1414,7 +1450,7 @@
>     NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>             if (raw) {
>                bf = src[0];
>             } else {
>     -         v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
>     +         v = loadSufInfo32(NULL, base + NVE4_SU_INFO_FMT);
>                bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero)
>                   ->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
>             }
>     @@ -1431,7 +1467,7 @@
>     NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>             case 2:
>                z = off;
>                if (!su->tex.target.isArray()) {
>     -            z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
>     +            z = loadSufInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
>                   subOp = NV50_IR_SUBOP_SUBFM_3D;
>                }
>                break;
>     @@ -1446,7 +1482,7 @@
>     NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>          }
>
>          // part 2
>     -   v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR);
>     +   v = loadSufInfo32(NULL, base + NVE4_SU_INFO_ADDR);
>
>          if (su->tex.target == TEX_TARGET_BUFFER) {
>             eau = v;
>     @@ -1455,7 +1491,7 @@
>     NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>          }
>          // add array layer offset
>          if (su->tex.target.isArray()) {
>     -      v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY);
>     +      v = loadSufInfo32(NULL, base + NVE4_SU_INFO_ARRAY);
>             if (dim == 1)
>                bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
>                   ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
>     @@ -1495,7 +1531,7 @@
>     NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>
>          // let's just set it 0 for raw access and hope it works
>          v = raw ?
>     -      bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
>     +      bld.mkImm(0) : loadSufInfo32(NULL, base + NVE4_SU_INFO_FMT);
>
>          // get rid of old coordinate sources, make space for fmt info
>     and predicate
>          su->moveSources(arg, 3 - arg);
>     @@ -1966,12 +2002,12 @@ NVC0LoweringPass::visit(Instruction *i)
>                i->op = OP_VFETCH;
>             } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
>                Value *ind = i->getIndirect(0, 1);
>     -         Value *ptr = loadResInfo64(ind,
>     i->getSrc(0)->reg.fileIndex * 16);
>     +         Value *ptr = loadBufInfo64(ind,
>     i->getSrc(0)->reg.fileIndex * 16);
>                // XXX come up with a way not to do this for EVERY little
>     access but
>                // rather to batch these up somehow. Unfortunately we've
>     lost the
>                // information about the field width by the time we get here.
>                Value *offset = bld.loadImm(NULL,
>     i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
>     -         Value *length = loadResLength32(ind,
>     i->getSrc(0)->reg.fileIndex * 16);
>     +         Value *length = loadBufLength32(ind,
>     i->getSrc(0)->reg.fileIndex * 16);
>                Value *pred = new_LValue(func, FILE_PREDICATE);
>                if (i->src(0).isIndirect(0)) {
>                   bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr,
>     i->getIndirect(0, 0));
>     diff --git
>     a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>     b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>     index 6eb8aff..3872f52 100644
>     --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>     +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>     @@ -117,9 +117,15 @@ private:
>
>          void readTessCoord(LValue *dst, int c);
>
>     -   Value *loadResInfo32(Value *ptr, uint32_t off);
>     -   Value *loadResInfo64(Value *ptr, uint32_t off);
>     -   Value *loadResLength32(Value *ptr, uint32_t off);
>     +   Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
>     +   Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
>     +   Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
>     +   Value *loadSufInfo32(Value *ptr, uint32_t off);
>     +   Value *loadSufInfo64(Value *ptr, uint32_t off);
>     +   Value *loadSufLength32(Value *ptr, uint32_t off);
>     +   Value *loadBufInfo32(Value *ptr, uint32_t off);
>     +   Value *loadBufInfo64(Value *ptr, uint32_t off);
>     +   Value *loadBufLength32(Value *ptr, uint32_t off);
>          Value *loadMsInfo32(Value *ptr, uint32_t off);
>          Value *loadTexHandle(Value *ptr, unsigned int slot);
>
>     diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>     b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>     index 89a7f5c..d01de73 100644
>     --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>     +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>     @@ -544,22 +544,24 @@ nvc0_program_translate(struct nvc0_program
>     *prog, uint16_t chipset,
>                info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
>                info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
>                info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
>     +         info->io.bufInfoBase = 0; /* TODO */
>             } else {
>                info->io.resInfoCBSlot = 15;
>     -         info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>     +         info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>     +         info->io.suInfoBase = 0; /* TODO */
>             }
>             info->io.msInfoCBSlot = 0;
>             info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
>          } else {
>             if (chipset >= NVISA_GK104_CHIPSET) {
>                info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>     -         info->io.suInfoBase = 0; /* TODO */
>             }
>             info->io.resInfoCBSlot = 15;
>             info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
>     -      info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>     +      info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>             info->io.msInfoCBSlot = 15;
>             info->io.msInfoBase = 0; /* TODO */
>     +      info->io.suInfoBase = 0; /* TODO */
>          }
>
>          info->assignSlots = nvc0_program_assign_varying_slots;
>     --
>     2.7.1
>
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev at lists.freedesktop.org <mailto:mesa-dev at lists.freedesktop.org>
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>