[Mesa-dev] [PATCH v3] nvc0: fix bindless multisampled images on Maxwell+

Sat Sep 22 15:08:01 UTC 2018

On Mon, Sep 17, 2018 at 12:19 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small
> anyway.
>
> With these changes, TXQ is used to determine the number of samples and
> the coordinate adjustment information looked up in a small array in the
> driver constant buffer.
>
> v2: rework to use TXQ and a small array instead of a larger array with an
>     entry for each texture
> v3: get rid of the small array and calculate the adjustments in the shader
>
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> Fixes: c2ae9b40527 ('nvc0: implement multisampled images on Maxwell+')
> ---
>  .../codegen/nv50_ir_lowering_gm107.cpp        |  4 +-
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 43 ++++++++++++++++++-
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h   |  3 +-
>  3 files changed, 45 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> index c7436e2e29..49a5f3b01f 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> @@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
>
>        if (mask & 0x1)
>           bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
> -                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
> +                   loadMsAdjInfo32(suq->tex.target, 0, slot, ind, suq->tex.bindless));
>        if (mask & 0x2) {
>           int d = util_bitcount(mask & 0x1);
>           bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
> -                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
> +                   loadMsAdjInfo32(suq->tex.target, 1, slot, ind, suq->tex.bindless));
>        }
>     }
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 176e0cf608..7ca38ca4bb 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1732,6 +1732,45 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
>                          prog->driver->io.suInfoBase);
>  }
>
> +inline Value *
> +NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless)
> +{
> +   if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET)
> +      return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless);
> +
> +   assert(bindless);
> +
> +   Value *samples = bld.getSSA();
> +   // this shouldn't be lowered because it's being inserted before the current instruction
> +   TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
> +   tex->tex.target = target;
> +   tex->tex.query = TXQ_TYPE;
> +   tex->tex.mask = 0x4;
> +   tex->tex.r = 0xff;
> +   tex->tex.s = 0x1f;
> +   tex->tex.rIndirectSrc = 0;
> +   tex->setDef(0, samples);
> +   tex->setSrc(0, ind);
> +   tex->setSrc(1, bld.loadImm(NULL, 0));
> +   bld.insert(tex);
> +
> +   // doesn't work with sample counts other than 1/2/4/8 but they aren't supported
> +   switch (index) {
> +   case 0: {
> +      Value *tmp = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(2));
> +      return bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(2));
> +   }
> +   case 1: {
> +      Value *tmp = bld.mkCmp(OP_SET, CC_GT, TYPE_U32, bld.getSSA(), TYPE_U32, samples, bld.mkImm(2))->getDef(0);
> +      return bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(1));

I'd prefer OP_NEG here (with a TYPE_S32). That will allow the modifier
to get embedded into a use, perhaps.

Or alternatively, make OP_SET with TYPE_U8 + FILE_PREDICATE, and then
OP_CVT the predicate, which will come out as a 1, iirc. However I
think I like the OP_NEG better -- predicates are apparently slow, and
it's a lot more likely for this value to get used in arithmetic (hence
NEG embedding possible).

Otherwise this is

Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>

> +   }
> +   default: {
> +      assert(false);
> +      return NULL;
> +   }
> +   }
> +}
> +
>  static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
>  {
>     switch (su->tex.target.getEnum()) {
> @@ -1817,8 +1856,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
>     Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
>     Value *ind = tex->getIndirectR();
>
> -   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
> -   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
> +   Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless);
> +   Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless);
>
>     bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
>     bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index 5dbb3e4f00..4136b1ecfe 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -148,7 +148,7 @@ protected:
>     void handlePIXLD(Instruction *);
>
>     void checkPredicate(Instruction *);
> -   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
> +   Value *loadMsAdjInfo32(TexInstruction::Target targ, uint32_t index, int slot, Value *ind, bool bindless);
>
>     virtual bool visit(Instruction *);
>
> @@ -161,6 +161,7 @@ private:
>     Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
>     Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
>     Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
> +   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
>     Value *loadBufInfo64(Value *ptr, uint32_t off);
>     Value *loadBufLength32(Value *ptr, uint32_t off);
>     Value *loadUboInfo64(Value *ptr, uint32_t off);
> --
> 2.17.1
>