[Mesa-dev] [PATCH] nvc0: fix bindless multisampled images on Maxwell+

Rhys Perry pendingchaos02 at gmail.com
Fri Jul 20 14:56:21 UTC 2018


NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small
anyway.

This adds a new array, NVC0_CB_AUX_SU_MS_INFO, which aliases it and
provides the needed information.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h       |  1 +
 .../nouveau/codegen/nv50_ir_lowering_gm107.cpp     |  4 +-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 49 +++++++++++++++++++++-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h        | 32 +-------------
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  5 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c        | 28 +++++++++++++
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c    | 26 +++++++++++-
 8 files changed, 110 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 7c835ceab8..0045ef729d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -188,6 +188,7 @@ struct nv50_ir_prog_info
       uint8_t msInfoCBSlot;      /* cX[] used for multisample info */
       uint16_t msInfoBase;       /* base address for multisample info */
       uint16_t uboInfoBase;      /* base address for compute UBOs (gk104+) */
+      uint16_t suMsInfoBase;     /* base address for surface multisample info (gm107+) */
    } io;
 
    /* driver callback to assign input/output locations */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index c7436e2e29..d8eeaf4d09 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
 
       if (mask & 0x1)
          bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
-                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
+                   loadSuMsInfo32(handle, 0));
       if (mask & 0x2) {
          int d = util_bitcount(mask & 0x1);
          bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
-                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
+                   loadSuMsInfo32(handle, 1));
       }
    }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 1410cf26c8..e782b5bef9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1712,6 +1712,35 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
       mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
 }
 
+/* On nvc0, surface info is obtained via the surface binding points passed
+ * to the SULD/SUST instructions.
+ * On nve4, surface info is stored in c[] and is used by various special
+ * instructions, e.g. for clamping coordinates or generating an address.
+ * They couldn't just have added an equivalent to TIC now, couldn't they ?
+ */
+#define NVC0_SU_INFO_ADDR   0x00
+#define NVC0_SU_INFO_FMT    0x04
+#define NVC0_SU_INFO_DIM_X  0x08
+#define NVC0_SU_INFO_PITCH  0x0c
+#define NVC0_SU_INFO_DIM_Y  0x10
+#define NVC0_SU_INFO_ARRAY  0x14
+#define NVC0_SU_INFO_DIM_Z  0x18
+#define NVC0_SU_INFO_UNK1C  0x1c
+#define NVC0_SU_INFO_WIDTH  0x20
+#define NVC0_SU_INFO_HEIGHT 0x24
+#define NVC0_SU_INFO_DEPTH  0x28
+#define NVC0_SU_INFO_TARGET 0x2c
+#define NVC0_SU_INFO_BSIZE  0x30
+#define NVC0_SU_INFO_RAW_X  0x34
+#define NVC0_SU_INFO_MS_X   0x38
+#define NVC0_SU_INFO_MS_Y   0x3c
+
+#define NVC0_SU_INFO__STRIDE 0x40
+
+#define NVC0_SU_INFO_DIM(i)  (0x08 + (i) * 8)
+#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
+#define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
+
 inline Value *
 NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless)
 {
@@ -1732,6 +1761,15 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
                         prog->driver->io.suInfoBase);
 }
 
+inline Value *
+NVC0LoweringPass::loadSuMsInfo32(Value *handle, uint32_t index)
+{
+   Value *ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), handle, bld.mkImm(2047));
+   ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(3));
+
+   return loadResInfo32(ptr, index * 4, prog->driver->io.suMsInfoBase);
+}
+
 static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
 {
    switch (su->tex.target.getEnum()) {
@@ -1817,8 +1855,15 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
    Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
    Value *ind = tex->getIndirectR();
 
-   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
-   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+   Value *ms_x, *ms_y;
+   if (targ->getChipset() >= NVISA_GM107_CHIPSET) {
+      Value *handle = tex->tex.bindless ? ind : loadTexHandle(ind, slot + 32);
+      ms_x = loadSuMsInfo32(handle, 0);
+      ms_y = loadSuMsInfo32(handle, 1);
+   } else {
+      ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
+      ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+   }
 
    bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
    bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 8724c09afd..5b3918d906 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -23,35 +23,6 @@
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_build_util.h"
 
-/* On nvc0, surface info is obtained via the surface binding points passed
- * to the SULD/SUST instructions.
- * On nve4, surface info is stored in c[] and is used by various special
- * instructions, e.g. for clamping coordinates or generating an address.
- * They couldn't just have added an equivalent to TIC now, couldn't they ?
- */
-#define NVC0_SU_INFO_ADDR   0x00
-#define NVC0_SU_INFO_FMT    0x04
-#define NVC0_SU_INFO_DIM_X  0x08
-#define NVC0_SU_INFO_PITCH  0x0c
-#define NVC0_SU_INFO_DIM_Y  0x10
-#define NVC0_SU_INFO_ARRAY  0x14
-#define NVC0_SU_INFO_DIM_Z  0x18
-#define NVC0_SU_INFO_UNK1C  0x1c
-#define NVC0_SU_INFO_WIDTH  0x20
-#define NVC0_SU_INFO_HEIGHT 0x24
-#define NVC0_SU_INFO_DEPTH  0x28
-#define NVC0_SU_INFO_TARGET 0x2c
-#define NVC0_SU_INFO_BSIZE  0x30
-#define NVC0_SU_INFO_RAW_X  0x34
-#define NVC0_SU_INFO_MS_X   0x38
-#define NVC0_SU_INFO_MS_Y   0x3c
-
-#define NVC0_SU_INFO__STRIDE 0x40
-
-#define NVC0_SU_INFO_DIM(i)  (0x08 + (i) * 8)
-#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
-#define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
-
 namespace nv50_ir {
 
 class NVC0LegalizeSSA : public Pass
@@ -148,7 +119,7 @@ protected:
    void handlePIXLD(Instruction *);
 
    void checkPredicate(Instruction *);
-   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
+   Value *loadSuMsInfo32(Value *handle, uint32_t index);
 
    virtual bool visit(Instruction *);
 
@@ -161,6 +132,7 @@ private:
    Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
+   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
    Value *loadBufInfo64(Value *ptr, uint32_t off);
    Value *loadBufLength32(Value *ptr, uint32_t off);
    Value *loadUboInfo64(Value *ptr, uint32_t off);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 77237a3c0a..8aecfd8f6d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -149,9 +149,12 @@
 /* 1 64-bits address and 1 32-bits sequence */
 #define NVC0_CB_AUX_MP_INFO         0x6a0
 #define NVC0_CB_AUX_MP_SIZE         3 * 4
-/* 512 64-byte blocks for bindless image handles */
+/* 512 64-byte blocks for bindless image handles (NVE4 only) */
 #define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
 #define NVC0_CB_AUX_BINDLESS_SIZE   (NVE4_IMG_MAX_HANDLES * 16 * 4)
+/* 2048 8-byte blocks for image multisampling info (GM107+) */
+#define NVC0_CB_AUX_SU_MS_INFO(i) 0x6b0 + (i) * 2 * 4
+#define NVC0_CB_AUX_SU_MS_SIZE   (NVC0_TIC_MAX_ENTRIES * 2 * 4)
 /* 4 32-bits floats for the vertex runout, put at the end */
 #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 57d98753f4..10e9815ccc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -607,6 +607,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
       info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
       info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0);
    }
+   if (info->target >= NVISA_GM107_CHIPSET)
+      info->io.suMsInfoBase = NVC0_CB_AUX_SU_MS_INFO(0);
 
    if (prog->type == PIPE_SHADER_COMPUTE) {
       if (info->target >= NVISA_GK104_CHIPSET) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index f40600e48a..4b4359c889 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -1258,6 +1258,17 @@ gm107_validate_surfaces(struct nvc0_context *nvc0,
    BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
    PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
    PUSH_DATA (push, tic->id);
+
+   /* upload multisampling info */
+   if (view->resource->target == PIPE_TEXTURE_2D ||
+       view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
+      struct nv50_miptree *mt = nv50_miptree(view->resource);
+
+      BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
+      PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id));
+      PUSH_DATA (push, mt->ms_x);
+      PUSH_DATA (push, mt->ms_y);
+   }
 }
 
 static inline void
@@ -1398,6 +1409,7 @@ gm107_create_image_handle(struct pipe_context *pipe,
    struct pipe_sampler_view *sview =
       gm107_create_texture_view_from_image(pipe, view);
    struct nv50_tic_entry *tic = nv50_tic_entry(sview);
+   int s;
 
    if (tic == NULL)
       goto fail;
@@ -1415,6 +1427,22 @@ gm107_create_image_handle(struct pipe_context *pipe,
 
    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
 
+   if (view->resource->target == PIPE_TEXTURE_2D ||
+       view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
+      struct nv50_miptree *mt = nv50_miptree(view->resource);
+
+      for (s = 0; s < 6; s++) {
+         BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+         PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+         PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+         PUSH_DATA (push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+         BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
+         PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id));
+         PUSH_DATA (push, mt->ms_x);
+         PUSH_DATA (push, mt->ms_y);
+      }
+   }
+
    return 0x100000000ULL | tic->id;
 
 fail:
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 28460f8cbe..7d436d1980 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -251,6 +251,23 @@ gm107_compute_validate_surfaces(struct nvc0_context *nvc0,
    PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
    PUSH_DATA (push, tic->id);
 
+   /* upload multisampling info */
+   if (view->resource->target == PIPE_TEXTURE_2D ||
+       view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
+      struct nv50_miptree *mt = nv50_miptree(view->resource);
+
+      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
+      PUSH_DATAh(push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id));
+      PUSH_DATA (push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id));
+      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
+      PUSH_DATA (push, 8);
+      PUSH_DATA (push, 0x1);
+      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 3);
+      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+      PUSH_DATA (push, mt->ms_x);
+      PUSH_DATA (push, mt->ms_y);
+   }
+
    BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
    PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
 }
@@ -558,6 +575,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
 {
    const struct nvc0_screen *screen = nvc0->screen;
    const struct nvc0_program *cp = nvc0->compprog;
+   int cb_size;
 
    nve4_cp_launch_desc_init_default(desc);
 
@@ -586,8 +604,12 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
       nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
                                  NVC0_CB_USR_INFO(5), 1 << 16);
    }
+
+   cb_size = 1 << 12;
+   if (nvc0->screen->compute->oclass >= GM107_COMPUTE_CLASS)
+      cb_size = 1 << 15; // make room for NVC0_CB_AUX_SU_MS_INFO
    nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
-                              NVC0_CB_AUX_INFO(5), 1 << 11);
+                              NVC0_CB_AUX_INFO(5), cb_size);
 }
 
 static void
@@ -625,7 +647,7 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0,
                                   NVC0_CB_USR_INFO(5), 1 << 16);
    }
    gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
-                               NVC0_CB_AUX_INFO(5), 1 << 11);
+                               NVC0_CB_AUX_INFO(5), 1 << 15);
 }
 
 static inline void *
-- 
2.14.4



More information about the mesa-dev mailing list