[Mesa-dev] [PATCH] nvc0: fix bindless multisampled images on Maxwell+
Ilia Mirkin
imirkin at alum.mit.edu
Fri Jul 20 15:03:51 UTC 2018
For slightly more instruction cost, but simpler logic, you could also
retrieve the number of samples from the TIC. There's a query for that.
Not necessarily a good idea though, just a thought.
On Fri, Jul 20, 2018 at 10:56 AM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small
> anyway.
>
> This adds a new array, NVC0_CB_AUX_SU_MS_INFO, which aliases it and
> provides the needed information.
>
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
> .../drivers/nouveau/codegen/nv50_ir_driver.h | 1 +
> .../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 4 +-
> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 49 +++++++++++++++++++++-
> .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 32 +-------------
> src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 5 ++-
> src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 +
> src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 28 +++++++++++++
> src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 26 +++++++++++-
> 8 files changed, 110 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 7c835ceab8..0045ef729d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -188,6 +188,7 @@ struct nv50_ir_prog_info
> uint8_t msInfoCBSlot; /* cX[] used for multisample info */
> uint16_t msInfoBase; /* base address for multisample info */
> uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */
> + uint16_t suMsInfoBase; /* base address for surface multisample info (gm107+) */
> } io;
>
> /* driver callback to assign input/output locations */
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> index c7436e2e29..d8eeaf4d09 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> @@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
>
> if (mask & 0x1)
> bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
> - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
> + loadSuMsInfo32(handle, 0));
> if (mask & 0x2) {
> int d = util_bitcount(mask & 0x1);
> bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
> - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
> + loadSuMsInfo32(handle, 1));
> }
> }
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 1410cf26c8..e782b5bef9 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1712,6 +1712,35 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
> mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
> }
>
> +/* On nvc0, surface info is obtained via the surface binding points passed
> + * to the SULD/SUST instructions.
> + * On nve4, surface info is stored in c[] and is used by various special
> + * instructions, e.g. for clamping coordinates or generating an address.
> + * They couldn't just have added an equivalent to TIC now, couldn't they ?
> + */
> +#define NVC0_SU_INFO_ADDR 0x00
> +#define NVC0_SU_INFO_FMT 0x04
> +#define NVC0_SU_INFO_DIM_X 0x08
> +#define NVC0_SU_INFO_PITCH 0x0c
> +#define NVC0_SU_INFO_DIM_Y 0x10
> +#define NVC0_SU_INFO_ARRAY 0x14
> +#define NVC0_SU_INFO_DIM_Z 0x18
> +#define NVC0_SU_INFO_UNK1C 0x1c
> +#define NVC0_SU_INFO_WIDTH 0x20
> +#define NVC0_SU_INFO_HEIGHT 0x24
> +#define NVC0_SU_INFO_DEPTH 0x28
> +#define NVC0_SU_INFO_TARGET 0x2c
> +#define NVC0_SU_INFO_BSIZE 0x30
> +#define NVC0_SU_INFO_RAW_X 0x34
> +#define NVC0_SU_INFO_MS_X 0x38
> +#define NVC0_SU_INFO_MS_Y 0x3c
> +
> +#define NVC0_SU_INFO__STRIDE 0x40
> +
> +#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8)
> +#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
> +#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4)
> +
> inline Value *
> NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless)
> {
> @@ -1732,6 +1761,15 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
> prog->driver->io.suInfoBase);
> }
>
> +inline Value *
> +NVC0LoweringPass::loadSuMsInfo32(Value *handle, uint32_t index)
> +{
> + Value *ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), handle, bld.mkImm(2047));
> + ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(3));
> +
> + return loadResInfo32(ptr, index * 4, prog->driver->io.suMsInfoBase);
> +}
> +
> static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
> {
> switch (su->tex.target.getEnum()) {
> @@ -1817,8 +1855,15 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
> Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
> Value *ind = tex->getIndirectR();
>
> - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
> - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
> + Value *ms_x, *ms_y;
> + if (targ->getChipset() >= NVISA_GM107_CHIPSET) {
> + Value *handle = tex->tex.bindless ? ind : loadTexHandle(ind, slot + 32);
> + ms_x = loadSuMsInfo32(handle, 0);
> + ms_y = loadSuMsInfo32(handle, 1);
> + } else {
> + ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
> + ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
> + }
>
> bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
> bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index 8724c09afd..5b3918d906 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -23,35 +23,6 @@
> #include "codegen/nv50_ir.h"
> #include "codegen/nv50_ir_build_util.h"
>
> -/* On nvc0, surface info is obtained via the surface binding points passed
> - * to the SULD/SUST instructions.
> - * On nve4, surface info is stored in c[] and is used by various special
> - * instructions, e.g. for clamping coordinates or generating an address.
> - * They couldn't just have added an equivalent to TIC now, couldn't they ?
> - */
> -#define NVC0_SU_INFO_ADDR 0x00
> -#define NVC0_SU_INFO_FMT 0x04
> -#define NVC0_SU_INFO_DIM_X 0x08
> -#define NVC0_SU_INFO_PITCH 0x0c
> -#define NVC0_SU_INFO_DIM_Y 0x10
> -#define NVC0_SU_INFO_ARRAY 0x14
> -#define NVC0_SU_INFO_DIM_Z 0x18
> -#define NVC0_SU_INFO_UNK1C 0x1c
> -#define NVC0_SU_INFO_WIDTH 0x20
> -#define NVC0_SU_INFO_HEIGHT 0x24
> -#define NVC0_SU_INFO_DEPTH 0x28
> -#define NVC0_SU_INFO_TARGET 0x2c
> -#define NVC0_SU_INFO_BSIZE 0x30
> -#define NVC0_SU_INFO_RAW_X 0x34
> -#define NVC0_SU_INFO_MS_X 0x38
> -#define NVC0_SU_INFO_MS_Y 0x3c
> -
> -#define NVC0_SU_INFO__STRIDE 0x40
> -
> -#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8)
> -#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
> -#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4)
> -
> namespace nv50_ir {
>
> class NVC0LegalizeSSA : public Pass
> @@ -148,7 +119,7 @@ protected:
> void handlePIXLD(Instruction *);
>
> void checkPredicate(Instruction *);
> - Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
> + Value *loadSuMsInfo32(Value *handle, uint32_t index);
>
> virtual bool visit(Instruction *);
>
> @@ -161,6 +132,7 @@ private:
> Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
> Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
> Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
> + Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
> Value *loadBufInfo64(Value *ptr, uint32_t off);
> Value *loadBufLength32(Value *ptr, uint32_t off);
> Value *loadUboInfo64(Value *ptr, uint32_t off);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 77237a3c0a..8aecfd8f6d 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -149,9 +149,12 @@
> /* 1 64-bits address and 1 32-bits sequence */
> #define NVC0_CB_AUX_MP_INFO 0x6a0
> #define NVC0_CB_AUX_MP_SIZE 3 * 4
> -/* 512 64-byte blocks for bindless image handles */
> +/* 512 64-byte blocks for bindless image handles (NVE4 only) */
> #define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
> #define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4)
> +/* 2048 8-byte blocks for image multisampling info (GM107+) */
> +#define NVC0_CB_AUX_SU_MS_INFO(i) 0x6b0 + (i) * 2 * 4
> +#define NVC0_CB_AUX_SU_MS_SIZE (NVC0_TIC_MAX_ENTRIES * 2 * 4)
> /* 4 32-bits floats for the vertex runout, put at the end */
> #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index 57d98753f4..10e9815ccc 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -607,6 +607,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
> info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
> info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0);
> }
> + if (info->target >= NVISA_GM107_CHIPSET)
> + info->io.suMsInfoBase = NVC0_CB_AUX_SU_MS_INFO(0);
>
> if (prog->type == PIPE_SHADER_COMPUTE) {
> if (info->target >= NVISA_GK104_CHIPSET) {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> index f40600e48a..4b4359c889 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> @@ -1258,6 +1258,17 @@ gm107_validate_surfaces(struct nvc0_context *nvc0,
> BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
> PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
> PUSH_DATA (push, tic->id);
> +
> + /* upload multisampling info */
> + if (view->resource->target == PIPE_TEXTURE_2D ||
> + view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
> + struct nv50_miptree *mt = nv50_miptree(view->resource);
> +
> + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
> + PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id));
> + PUSH_DATA (push, mt->ms_x);
> + PUSH_DATA (push, mt->ms_y);
> + }
> }
>
> static inline void
> @@ -1398,6 +1409,7 @@ gm107_create_image_handle(struct pipe_context *pipe,
> struct pipe_sampler_view *sview =
> gm107_create_texture_view_from_image(pipe, view);
> struct nv50_tic_entry *tic = nv50_tic_entry(sview);
> + int s;
>
> if (tic == NULL)
> goto fail;
> @@ -1415,6 +1427,22 @@ gm107_create_image_handle(struct pipe_context *pipe,
>
> nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
>
> + if (view->resource->target == PIPE_TEXTURE_2D ||
> + view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
> + struct nv50_miptree *mt = nv50_miptree(view->resource);
> +
> + for (s = 0; s < 6; s++) {
> + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> + PUSH_DATA (push, NVC0_CB_AUX_SIZE);
> + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
> + PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id));
> + PUSH_DATA (push, mt->ms_x);
> + PUSH_DATA (push, mt->ms_y);
> + }
> + }
> +
> return 0x100000000ULL | tic->id;
>
> fail:
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index 28460f8cbe..7d436d1980 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -251,6 +251,23 @@ gm107_compute_validate_surfaces(struct nvc0_context *nvc0,
> PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
> PUSH_DATA (push, tic->id);
>
> + /* upload multisampling info */
> + if (view->resource->target == PIPE_TEXTURE_2D ||
> + view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
> + struct nv50_miptree *mt = nv50_miptree(view->resource);
> +
> + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> + PUSH_DATAh(push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id));
> + PUSH_DATA (push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id));
> + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> + PUSH_DATA (push, 8);
> + PUSH_DATA (push, 0x1);
> + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 3);
> + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
> + PUSH_DATA (push, mt->ms_x);
> + PUSH_DATA (push, mt->ms_y);
> + }
> +
> BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
> PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
> }
> @@ -558,6 +575,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
> {
> const struct nvc0_screen *screen = nvc0->screen;
> const struct nvc0_program *cp = nvc0->compprog;
> + int cb_size;
>
> nve4_cp_launch_desc_init_default(desc);
>
> @@ -586,8 +604,12 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
> nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
> NVC0_CB_USR_INFO(5), 1 << 16);
> }
> +
> + cb_size = 1 << 12;
> + if (nvc0->screen->compute->oclass >= GM107_COMPUTE_CLASS)
> + cb_size = 1 << 15; // make room for NVC0_CB_AUX_SU_MS_INFO
> nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
> - NVC0_CB_AUX_INFO(5), 1 << 11);
> + NVC0_CB_AUX_INFO(5), cb_size);
> }
>
> static void
> @@ -625,7 +647,7 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0,
> NVC0_CB_USR_INFO(5), 1 << 16);
> }
> gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
> - NVC0_CB_AUX_INFO(5), 1 << 11);
> + NVC0_CB_AUX_INFO(5), 1 << 15);
> }
>
> static inline void *
> --
> 2.14.4
>
More information about the mesa-dev
mailing list