[Mesa-dev] [PATCH] nvc0: fix bindless multisampled images on Maxwell+

Ilia Mirkin imirkin at alum.mit.edu
Fri Jul 20 15:03:51 UTC 2018


For slightly more instruction cost, but simpler logic, you could also
retrieve the number of samples from the TIC. There's a query for that.
Not necessarily a good idea though, just a thought.

On Fri, Jul 20, 2018 at 10:56 AM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small
> anyway.
>
> This adds a new array, NVC0_CB_AUX_SU_MS_INFO, which aliases it and
> provides the needed information.
>
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_driver.h       |  1 +
>  .../nouveau/codegen/nv50_ir_lowering_gm107.cpp     |  4 +-
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 49 +++++++++++++++++++++-
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h        | 32 +-------------
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  5 ++-
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  2 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c        | 28 +++++++++++++
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c    | 26 +++++++++++-
>  8 files changed, 110 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 7c835ceab8..0045ef729d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -188,6 +188,7 @@ struct nv50_ir_prog_info
>        uint8_t msInfoCBSlot;      /* cX[] used for multisample info */
>        uint16_t msInfoBase;       /* base address for multisample info */
>        uint16_t uboInfoBase;      /* base address for compute UBOs (gk104+) */
> +      uint16_t suMsInfoBase;     /* base address for surface multisample info (gm107+) */
>     } io;
>
>     /* driver callback to assign input/output locations */
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> index c7436e2e29..d8eeaf4d09 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> @@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
>
>        if (mask & 0x1)
>           bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
> -                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
> +                   loadSuMsInfo32(handle, 0));
>        if (mask & 0x2) {
>           int d = util_bitcount(mask & 0x1);
>           bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
> -                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
> +                   loadSuMsInfo32(handle, 1));
>        }
>     }
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 1410cf26c8..e782b5bef9 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1712,6 +1712,35 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
>        mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
>  }
>
> +/* On nvc0, surface info is obtained via the surface binding points passed
> + * to the SULD/SUST instructions.
> + * On nve4, surface info is stored in c[] and is used by various special
> + * instructions, e.g. for clamping coordinates or generating an address.
> + * They couldn't just have added an equivalent to TIC now, couldn't they ?
> + */
> +#define NVC0_SU_INFO_ADDR   0x00
> +#define NVC0_SU_INFO_FMT    0x04
> +#define NVC0_SU_INFO_DIM_X  0x08
> +#define NVC0_SU_INFO_PITCH  0x0c
> +#define NVC0_SU_INFO_DIM_Y  0x10
> +#define NVC0_SU_INFO_ARRAY  0x14
> +#define NVC0_SU_INFO_DIM_Z  0x18
> +#define NVC0_SU_INFO_UNK1C  0x1c
> +#define NVC0_SU_INFO_WIDTH  0x20
> +#define NVC0_SU_INFO_HEIGHT 0x24
> +#define NVC0_SU_INFO_DEPTH  0x28
> +#define NVC0_SU_INFO_TARGET 0x2c
> +#define NVC0_SU_INFO_BSIZE  0x30
> +#define NVC0_SU_INFO_RAW_X  0x34
> +#define NVC0_SU_INFO_MS_X   0x38
> +#define NVC0_SU_INFO_MS_Y   0x3c
> +
> +#define NVC0_SU_INFO__STRIDE 0x40
> +
> +#define NVC0_SU_INFO_DIM(i)  (0x08 + (i) * 8)
> +#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
> +#define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
> +
>  inline Value *
>  NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless)
>  {
> @@ -1732,6 +1761,15 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
>                          prog->driver->io.suInfoBase);
>  }
>
> +inline Value *
> +NVC0LoweringPass::loadSuMsInfo32(Value *handle, uint32_t index)
> +{
> +   Value *ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), handle, bld.mkImm(2047));
> +   ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(3));
> +
> +   return loadResInfo32(ptr, index * 4, prog->driver->io.suMsInfoBase);
> +}
> +
>  static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
>  {
>     switch (su->tex.target.getEnum()) {
> @@ -1817,8 +1855,15 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
>     Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
>     Value *ind = tex->getIndirectR();
>
> -   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
> -   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
> +   Value *ms_x, *ms_y;
> +   if (targ->getChipset() >= NVISA_GM107_CHIPSET) {
> +      Value *handle = tex->tex.bindless ? ind : loadTexHandle(ind, slot + 32);
> +      ms_x = loadSuMsInfo32(handle, 0);
> +      ms_y = loadSuMsInfo32(handle, 1);
> +   } else {
> +      ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
> +      ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
> +   }
>
>     bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
>     bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index 8724c09afd..5b3918d906 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -23,35 +23,6 @@
>  #include "codegen/nv50_ir.h"
>  #include "codegen/nv50_ir_build_util.h"
>
> -/* On nvc0, surface info is obtained via the surface binding points passed
> - * to the SULD/SUST instructions.
> - * On nve4, surface info is stored in c[] and is used by various special
> - * instructions, e.g. for clamping coordinates or generating an address.
> - * They couldn't just have added an equivalent to TIC now, couldn't they ?
> - */
> -#define NVC0_SU_INFO_ADDR   0x00
> -#define NVC0_SU_INFO_FMT    0x04
> -#define NVC0_SU_INFO_DIM_X  0x08
> -#define NVC0_SU_INFO_PITCH  0x0c
> -#define NVC0_SU_INFO_DIM_Y  0x10
> -#define NVC0_SU_INFO_ARRAY  0x14
> -#define NVC0_SU_INFO_DIM_Z  0x18
> -#define NVC0_SU_INFO_UNK1C  0x1c
> -#define NVC0_SU_INFO_WIDTH  0x20
> -#define NVC0_SU_INFO_HEIGHT 0x24
> -#define NVC0_SU_INFO_DEPTH  0x28
> -#define NVC0_SU_INFO_TARGET 0x2c
> -#define NVC0_SU_INFO_BSIZE  0x30
> -#define NVC0_SU_INFO_RAW_X  0x34
> -#define NVC0_SU_INFO_MS_X   0x38
> -#define NVC0_SU_INFO_MS_Y   0x3c
> -
> -#define NVC0_SU_INFO__STRIDE 0x40
> -
> -#define NVC0_SU_INFO_DIM(i)  (0x08 + (i) * 8)
> -#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
> -#define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
> -
>  namespace nv50_ir {
>
>  class NVC0LegalizeSSA : public Pass
> @@ -148,7 +119,7 @@ protected:
>     void handlePIXLD(Instruction *);
>
>     void checkPredicate(Instruction *);
> -   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
> +   Value *loadSuMsInfo32(Value *handle, uint32_t index);
>
>     virtual bool visit(Instruction *);
>
> @@ -161,6 +132,7 @@ private:
>     Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
>     Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
>     Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
> +   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
>     Value *loadBufInfo64(Value *ptr, uint32_t off);
>     Value *loadBufLength32(Value *ptr, uint32_t off);
>     Value *loadUboInfo64(Value *ptr, uint32_t off);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 77237a3c0a..8aecfd8f6d 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -149,9 +149,12 @@
>  /* 1 64-bits address and 1 32-bits sequence */
>  #define NVC0_CB_AUX_MP_INFO         0x6a0
>  #define NVC0_CB_AUX_MP_SIZE         3 * 4
> -/* 512 64-byte blocks for bindless image handles */
> +/* 512 64-byte blocks for bindless image handles (NVE4 only) */
>  #define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
>  #define NVC0_CB_AUX_BINDLESS_SIZE   (NVE4_IMG_MAX_HANDLES * 16 * 4)
> +/* 2048 8-byte blocks for image multisampling info (GM107+) */
> +#define NVC0_CB_AUX_SU_MS_INFO(i) 0x6b0 + (i) * 2 * 4
> +#define NVC0_CB_AUX_SU_MS_SIZE   (NVC0_TIC_MAX_ENTRIES * 2 * 4)
>  /* 4 32-bits floats for the vertex runout, put at the end */
>  #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index 57d98753f4..10e9815ccc 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -607,6 +607,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
>        info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
>        info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0);
>     }
> +   if (info->target >= NVISA_GM107_CHIPSET)
> +      info->io.suMsInfoBase = NVC0_CB_AUX_SU_MS_INFO(0);
>
>     if (prog->type == PIPE_SHADER_COMPUTE) {
>        if (info->target >= NVISA_GK104_CHIPSET) {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> index f40600e48a..4b4359c889 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> @@ -1258,6 +1258,17 @@ gm107_validate_surfaces(struct nvc0_context *nvc0,
>     BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
>     PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
>     PUSH_DATA (push, tic->id);
> +
> +   /* upload multisampling info */
> +   if (view->resource->target == PIPE_TEXTURE_2D ||
> +       view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
> +      struct nv50_miptree *mt = nv50_miptree(view->resource);
> +
> +      BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
> +      PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id));
> +      PUSH_DATA (push, mt->ms_x);
> +      PUSH_DATA (push, mt->ms_y);
> +   }
>  }
>
>  static inline void
> @@ -1398,6 +1409,7 @@ gm107_create_image_handle(struct pipe_context *pipe,
>     struct pipe_sampler_view *sview =
>        gm107_create_texture_view_from_image(pipe, view);
>     struct nv50_tic_entry *tic = nv50_tic_entry(sview);
> +   int s;
>
>     if (tic == NULL)
>        goto fail;
> @@ -1415,6 +1427,22 @@ gm107_create_image_handle(struct pipe_context *pipe,
>
>     nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
>
> +   if (view->resource->target == PIPE_TEXTURE_2D ||
> +       view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
> +      struct nv50_miptree *mt = nv50_miptree(view->resource);
> +
> +      for (s = 0; s < 6; s++) {
> +         BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> +         PUSH_DATA (push, NVC0_CB_AUX_SIZE);
> +         PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> +         PUSH_DATA (push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> +         BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
> +         PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id));
> +         PUSH_DATA (push, mt->ms_x);
> +         PUSH_DATA (push, mt->ms_y);
> +      }
> +   }
> +
>     return 0x100000000ULL | tic->id;
>
>  fail:
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index 28460f8cbe..7d436d1980 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -251,6 +251,23 @@ gm107_compute_validate_surfaces(struct nvc0_context *nvc0,
>     PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>     PUSH_DATA (push, tic->id);
>
> +   /* upload multisampling info */
> +   if (view->resource->target == PIPE_TEXTURE_2D ||
> +       view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
> +      struct nv50_miptree *mt = nv50_miptree(view->resource);
> +
> +      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> +      PUSH_DATAh(push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id));
> +      PUSH_DATA (push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id));
> +      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> +      PUSH_DATA (push, 8);
> +      PUSH_DATA (push, 0x1);
> +      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 3);
> +      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
> +      PUSH_DATA (push, mt->ms_x);
> +      PUSH_DATA (push, mt->ms_y);
> +   }
> +
>     BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
>     PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
>  }
> @@ -558,6 +575,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
>  {
>     const struct nvc0_screen *screen = nvc0->screen;
>     const struct nvc0_program *cp = nvc0->compprog;
> +   int cb_size;
>
>     nve4_cp_launch_desc_init_default(desc);
>
> @@ -586,8 +604,12 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
>        nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
>                                   NVC0_CB_USR_INFO(5), 1 << 16);
>     }
> +
> +   cb_size = 1 << 12;
> +   if (nvc0->screen->compute->oclass >= GM107_COMPUTE_CLASS)
> +      cb_size = 1 << 15; // make room for NVC0_CB_AUX_SU_MS_INFO
>     nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
> -                              NVC0_CB_AUX_INFO(5), 1 << 11);
> +                              NVC0_CB_AUX_INFO(5), cb_size);
>  }
>
>  static void
> @@ -625,7 +647,7 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0,
>                                    NVC0_CB_USR_INFO(5), 1 << 16);
>     }
>     gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
> -                               NVC0_CB_AUX_INFO(5), 1 << 11);
> +                               NVC0_CB_AUX_INFO(5), 1 << 15);
>  }
>
>  static inline void *
> --
> 2.14.4
>


More information about the mesa-dev mailing list