[Mesa-dev] [PATCH 01/11] nvc0: use a different offset for buffers and surfaces
Ilia Mirkin
imirkin at alum.mit.edu
Sat Feb 27 14:42:34 UTC 2016
Why wouldn't you have surfaces for other shader types? Surface == image. I
was thinking they would just use a fixed offset from the suinfobase which I
repurposed for buffers.
Either way, why do you need to touch this now?
On Feb 27, 2016 9:02 AM, "Samuel Pitoiset" <samuel.pitoiset at gmail.com>
wrote:
> To not overwrite buffers and surfaces on Kepler, we need a different
> offset. This will be currently only used for compute because we have
> to bind both surfaces and buffers.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> .../drivers/nouveau/codegen/nv50_ir_driver.h | 1 +
> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 80
> ++++++++++++++++------
> .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 12 +++-
> src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 8 ++-
> 4 files changed, 73 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 4504240..479e426 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -179,6 +179,7 @@ struct nv50_ir_prog_info
> uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface
> info */
> uint16_t texBindBase; /* base address for tex handles (nve4) */
> uint16_t suInfoBase; /* base address for surface info (nve4)
> */
> + uint16_t bufInfoBase; /* base address for buffer info */
> uint16_t sampleInfoBase; /* base address for sample positions */
> uint8_t msInfoCBSlot; /* cX[] used for multisample info */
> uint16_t msInfoBase; /* base address for multisample info */
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index d181f15..8abdd93 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1026,7 +1026,7 @@ bool
> NVC0LoweringPass::handleSUQ(Instruction *suq)
> {
> suq->op = OP_MOV;
> - suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
> + suq->setSrc(0, loadBufLength32(suq->getIndirect(0, 1),
> suq->getSrc(0)->reg.fileIndex * 16));
> suq->setIndirect(0, 0, NULL);
> suq->setIndirect(0, 1, NULL);
> @@ -1142,7 +1142,7 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
> return true;
> default:
> assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
> - base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
> + base = loadBufInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
> assert(base->reg.size == 8);
> if (ptr)
> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
> @@ -1202,19 +1202,19 @@ NVC0LoweringPass::handleCasExch(Instruction *cas,
> bool needCctl)
> }
>
> inline Value *
> -NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
> +NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off, uint16_t base)
> {
> uint8_t b = prog->driver->io.resInfoCBSlot;
> - off += prog->driver->io.suInfoBase;
> + off += base;
> return bld.
> mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32,
> off), ptr);
> }
>
> inline Value *
> -NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
> +NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off, uint16_t base)
> {
> uint8_t b = prog->driver->io.resInfoCBSlot;
> - off += prog->driver->io.suInfoBase;
> + off += base;
>
> if (ptr)
> ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr,
> bld.mkImm(4));
> @@ -1224,10 +1224,10 @@ NVC0LoweringPass::loadResInfo64(Value *ptr,
> uint32_t off)
> }
>
> inline Value *
> -NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
> +NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off, uint16_t base)
> {
> uint8_t b = prog->driver->io.resInfoCBSlot;
> - off += prog->driver->io.suInfoBase;
> + off += base;
>
> if (ptr)
> ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr,
> bld.mkImm(4));
> @@ -1237,6 +1237,42 @@ NVC0LoweringPass::loadResLength32(Value *ptr,
> uint32_t off)
> }
>
> inline Value *
> +NVC0LoweringPass::loadSufInfo32(Value *ptr, uint32_t off)
> +{
> + return loadResInfo32(ptr, off, prog->driver->io.suInfoBase);
> +}
> +
> +inline Value *
> +NVC0LoweringPass::loadSufInfo64(Value *ptr, uint32_t off)
> +{
> + return loadResInfo64(ptr, off, prog->driver->io.suInfoBase);
> +}
> +
> +inline Value *
> +NVC0LoweringPass::loadSufLength32(Value *ptr, uint32_t off)
> +{
> + return loadResLength32(ptr, off, prog->driver->io.suInfoBase);
> +}
> +
> +inline Value *
> +NVC0LoweringPass::loadBufInfo32(Value *ptr, uint32_t off)
> +{
> + return loadResInfo32(ptr, off, prog->driver->io.bufInfoBase);
> +}
> +
> +inline Value *
> +NVC0LoweringPass::loadBufInfo64(Value *ptr, uint32_t off)
> +{
> + return loadResInfo64(ptr, off, prog->driver->io.bufInfoBase);
> +}
> +
> +inline Value *
> +NVC0LoweringPass::loadBufLength32(Value *ptr, uint32_t off)
> +{
> + return loadResLength32(ptr, off, prog->driver->io.bufInfoBase);
> +}
> +
> +inline Value *
> NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
> {
> uint8_t b = prog->driver->io.msInfoCBSlot;
> @@ -1316,8 +1352,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction
> *tex)
>
> Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
>
> - Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0));
> - Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1));
> + Value *ms_x = loadSufInfo32(NULL, base + NVE4_SU_INFO_MS(0));
> + Value *ms_y = loadSufInfo32(NULL, base + NVE4_SU_INFO_MS(1));
>
> bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
> bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
> @@ -1370,9 +1406,9 @@
> NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
> for (c = 0; c < arg; ++c) {
> src[c] = bld.getScratch();
> if (c == 0 && raw)
> - v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X);
> + v = loadSufInfo32(NULL, base + NVE4_SU_INFO_RAW_X);
> else
> - v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c));
> + v = loadSufInfo32(NULL, base + NVE4_SU_INFO_DIM(c));
> bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero)
> ->subOp = getSuClampSubOp(su, c);
> }
> @@ -1394,16 +1430,16 @@
> NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
> bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL,
> 0xffff));
> } else
> if (dim == 3) {
> - v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
> + v = loadSufInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
> bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
> ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
>
> - v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
> + v = loadSufInfo32(NULL, base + NVE4_SU_INFO_PITCH);
> bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
> ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
> } else {
> assert(dim == 2);
> - v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
> + v = loadSufInfo32(NULL, base + NVE4_SU_INFO_PITCH);
> bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
> ->subOp = su->tex.target.isArray() ?
> NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l
> u16l u16l
> @@ -1414,7 +1450,7 @@
> NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
> if (raw) {
> bf = src[0];
> } else {
> - v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
> + v = loadSufInfo32(NULL, base + NVE4_SU_INFO_FMT);
> bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero)
> ->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
> }
> @@ -1431,7 +1467,7 @@
> NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
> case 2:
> z = off;
> if (!su->tex.target.isArray()) {
> - z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
> + z = loadSufInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
> subOp = NV50_IR_SUBOP_SUBFM_3D;
> }
> break;
> @@ -1446,7 +1482,7 @@
> NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
> }
>
> // part 2
> - v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR);
> + v = loadSufInfo32(NULL, base + NVE4_SU_INFO_ADDR);
>
> if (su->tex.target == TEX_TARGET_BUFFER) {
> eau = v;
> @@ -1455,7 +1491,7 @@
> NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
> }
> // add array layer offset
> if (su->tex.target.isArray()) {
> - v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY);
> + v = loadSufInfo32(NULL, base + NVE4_SU_INFO_ARRAY);
> if (dim == 1)
> bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
> ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
> @@ -1495,7 +1531,7 @@
> NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>
> // let's just set it 0 for raw access and hope it works
> v = raw ?
> - bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
> + bld.mkImm(0) : loadSufInfo32(NULL, base + NVE4_SU_INFO_FMT);
>
> // get rid of old coordinate sources, make space for fmt info and
> predicate
> su->moveSources(arg, 3 - arg);
> @@ -1966,12 +2002,12 @@ NVC0LoweringPass::visit(Instruction *i)
> i->op = OP_VFETCH;
> } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
> Value *ind = i->getIndirect(0, 1);
> - Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex *
> 16);
> + Value *ptr = loadBufInfo64(ind, i->getSrc(0)->reg.fileIndex *
> 16);
> // XXX come up with a way not to do this for EVERY little access
> but
> // rather to batch these up somehow. Unfortunately we've lost the
> // information about the field width by the time we get here.
> Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset
> + typeSizeof(i->sType));
> - Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex
> * 16);
> + Value *length = loadBufLength32(ind, i->getSrc(0)->reg.fileIndex
> * 16);
> Value *pred = new_LValue(func, FILE_PREDICATE);
> if (i->src(0).isIndirect(0)) {
> bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index 6eb8aff..3872f52 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -117,9 +117,15 @@ private:
>
> void readTessCoord(LValue *dst, int c);
>
> - Value *loadResInfo32(Value *ptr, uint32_t off);
> - Value *loadResInfo64(Value *ptr, uint32_t off);
> - Value *loadResLength32(Value *ptr, uint32_t off);
> + Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
> + Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
> + Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
> + Value *loadSufInfo32(Value *ptr, uint32_t off);
> + Value *loadSufInfo64(Value *ptr, uint32_t off);
> + Value *loadSufLength32(Value *ptr, uint32_t off);
> + Value *loadBufInfo32(Value *ptr, uint32_t off);
> + Value *loadBufInfo64(Value *ptr, uint32_t off);
> + Value *loadBufLength32(Value *ptr, uint32_t off);
> Value *loadMsInfo32(Value *ptr, uint32_t off);
> Value *loadTexHandle(Value *ptr, unsigned int slot);
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index 89a7f5c..d01de73 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -544,22 +544,24 @@ nvc0_program_translate(struct nvc0_program *prog,
> uint16_t chipset,
> info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
> info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
> info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
> + info->io.bufInfoBase = 0; /* TODO */
> } else {
> info->io.resInfoCBSlot = 15;
> - info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
> + info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
> + info->io.suInfoBase = 0; /* TODO */
> }
> info->io.msInfoCBSlot = 0;
> info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
> } else {
> if (chipset >= NVISA_GK104_CHIPSET) {
> info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
> - info->io.suInfoBase = 0; /* TODO */
> }
> info->io.resInfoCBSlot = 15;
> info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
> - info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
> + info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
> info->io.msInfoCBSlot = 15;
> info->io.msInfoBase = 0; /* TODO */
> + info->io.suInfoBase = 0; /* TODO */
> }
>
> info->assignSlots = nvc0_program_assign_varying_slots;
> --
> 2.7.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160227/30e8b63d/attachment-0001.html>
More information about the mesa-dev
mailing list