[Mesa-dev] [PATCH v2] nvc0: fix bindless multisampled images on Maxwell+

Rhys Perry pendingchaos02 at gmail.com
Mon Sep 17 15:00:25 UTC 2018


NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small
anyway.

With these changes, TXQ is used to determine the number of samples and
the coordinate adjustment information looked up in a small array in the
driver constant buffer.

v2: rework to use TXQ and a small array instead of a larger array with an
    entry for each texture

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h  |  1 +
 .../codegen/nv50_ir_lowering_gm107.cpp        |  4 +--
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 31 +++++++++++++++++--
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h   |  3 +-
 .../nouveau/codegen/nv50_ir_peephole.cpp      |  1 +
 .../drivers/nouveau/nvc0/mme/com9097.mme      |  8 ++---
 .../drivers/nouveau/nvc0/mme/com9097.mme.h    |  8 ++---
 .../drivers/nouveau/nvc0/nvc0_context.h       | 23 ++++++++------
 .../drivers/nouveau/nvc0/nvc0_program.c       |  1 +
 .../drivers/nouveau/nvc0/nvc0_screen.c        | 15 +++++++++
 .../drivers/nouveau/nvc0/nve4_compute.c       | 22 +++++++++++++
 11 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 7c835ceab8..b3da6fc3cf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -188,6 +188,7 @@ struct nv50_ir_prog_info
       uint8_t msInfoCBSlot;      /* cX[] used for multisample info */
       uint16_t msInfoBase;       /* base address for multisample info */
       uint16_t uboInfoBase;      /* base address for compute UBOs (gk104+) */
+      uint16_t msAdjInfoBase;    /* base address for MS coordinate adjustment info */
    } io;
 
    /* driver callback to assign input/output locations */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index c7436e2e29..49a5f3b01f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
 
       if (mask & 0x1)
          bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
-                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
+                   loadMsAdjInfo32(suq->tex.target, 0, slot, ind, suq->tex.bindless));
       if (mask & 0x2) {
          int d = util_bitcount(mask & 0x1);
          bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
-                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
+                   loadMsAdjInfo32(suq->tex.target, 1, slot, ind, suq->tex.bindless));
       }
    }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 176e0cf608..5db29ba799 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1732,6 +1732,33 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
                         prog->driver->io.suInfoBase);
 }
 
+inline Value *
+NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless)
+{
+   if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET)
+      return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless);
+
+   assert(bindless);
+
+   Value *samples = bld.getSSA();
+   // This shouldn't be lowered because it's being inserted before the current instruction
+   TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
+   tex->tex.target = target;
+   tex->tex.query = TXQ_TYPE;
+   tex->tex.mask = 0x4;
+   tex->tex.r = 0xff;
+   tex->tex.s = 0x1f;
+   tex->tex.rIndirectSrc = 0;
+   tex->setDef(0, samples);
+   tex->setSrc(0, ind);
+   tex->setSrc(1, bld.loadImm(NULL, 0));
+   bld.insert(tex);
+
+   // XMAD has a higher throughput than SHL and we shouldn't be dealing with >65535 integers here
+   Value *ptr = bld.mkOp3v(OP_XMAD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(8), bld.mkImm(0));
+   return loadResInfo32(ptr, index * 4, prog->driver->io.msAdjInfoBase);
+}
+
 static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
 {
    switch (su->tex.target.getEnum()) {
@@ -1817,8 +1844,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
    Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
    Value *ind = tex->getIndirectR();
 
-   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
-   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+   Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless);
+   Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless);
 
    bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
    bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 5dbb3e4f00..4136b1ecfe 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -148,7 +148,7 @@ protected:
    void handlePIXLD(Instruction *);
 
    void checkPredicate(Instruction *);
-   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
+   Value *loadMsAdjInfo32(TexInstruction::Target targ, uint32_t index, int slot, Value *ind, bool bindless);
 
    virtual bool visit(Instruction *);
 
@@ -161,6 +161,7 @@ private:
    Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
+   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
    Value *loadBufInfo64(Value *ptr, uint32_t off);
    Value *loadBufLength32(Value *ptr, uint32_t off);
    Value *loadUboInfo64(Value *ptr, uint32_t off);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index d851cf3c37..f91c502e9e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -317,6 +317,7 @@ IndirectPropagation::visit(BasicBlock *bb)
          ImmediateValue imm;
          if (!i->src(s).isIndirect(0))
             continue;
+
          insn = i->getIndirect(s, 0)->getInsn();
          if (!insn)
             continue;
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 38c2e86843..8ca8f34f9b 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -255,7 +255,7 @@ dei_draw_again:
    parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
    parm $r4 send $r4 /* index_bias, send start */
    maddr 0x18e3 /* CB_POS */
-   send 0x1a0 /* 256 + 160 */
+   send 0x1e0 /* 256 + 224 */
    braz $r2 #dei_end
    parm $r5 send $r4 /* start_instance, send index_bias */
    send $r5 /* send start_instance */
@@ -311,7 +311,7 @@ dai_draw_again:
    braz $r3 #dai_end
    parm $r4 send $r4 /* start_instance */
    maddr 0x18e3 /* CB_POS */
-   send 0x1a0 /* 256 + 160 */
+   send 0x1e0 /* 256 + 224 */
    send 0x0 /* send 0 as base_vertex */
    send $r4 /* send start_instance */
    send $r6 /* draw id */
@@ -374,7 +374,7 @@ deic_draw_again:
    parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
    parm $r4 send $r4 /* index_bias, send start */
    maddr 0x18e3 /* CB_POS */
-   send 0x1a0 /* 256 + 160 */
+   send 0x1e0 /* 256 + 224 */
    braz $r2 #deic_end
    parm $r5 send $r4 /* start_instance, send index_bias */
    send $r5 /* send start_instance */
@@ -455,7 +455,7 @@ daic_draw_again:
    braz $r3 #daic_end
    parm $r4 send $r4 /* start_instance */
    maddr 0x18e3 /* CB_POS */
-   send 0x1a0 /* 256 + 160 */
+   send 0x1e0 /* 256 + 224 */
    send 0x0 /* send 0 as base_vertex */
    send $r4 /* send start_instance */
    send $r6 /* draw id */
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index 49c0891114..47c5e6c6e0 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -140,7 +140,7 @@ uint32_t mme9097_draw_elts_indirect[] = {
 	0x017dc451,
 	0x00002431,
 	0x0638c021,
-	0x00680041,
+	0x00780041,
 	0x0004d007,
 	0x00002531,
 	0x00002841,
@@ -185,7 +185,7 @@ uint32_t mme9097_draw_arrays_indirect[] = {
 	0x0004d807,
 	0x00002431,
 	0x0638c021,
-	0x00680041,
+	0x00780041,
 	0x00000041,
 	0x00002041,
 	0x00003041,
@@ -233,7 +233,7 @@ uint32_t mme9097_draw_elts_indirect_count[] = {
 	0x017dc451,
 	0x00002431,
 	0x0638c021,
-	0x00680041,
+	0x00780041,
 	0x0004d007,
 	0x00002531,
 	0x00002841,
@@ -300,7 +300,7 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
 	0x0004d807,
 	0x00002431,
 	0x0638c021,
-	0x00680041,
+	0x00780041,
 	0x00000041,
 	0x00002041,
 	0x00003041,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 77237a3c0a..1d920c26f5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -122,35 +122,38 @@
 /* 8 sets of 32-bits coordinate offsets */
 #define NVC0_CB_AUX_MS_INFO         0x0c0
 #define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
+/* 8 sets of 32-bit pairs containing coordinate adjustment information */
+#define NVC0_CB_AUX_MS_ADJ_INFO(i)  0x100 + (i) * 4 * 2
+#define NVC0_CB_AUX_MS_ADJ_SIZE     (8 * 2 * 4)
 /* block/grid size, at 3 32-bits integers each, gridid and work_dim */
-#define NVC0_CB_AUX_GRID_INFO(i)    0x100 + (i) * 4 /* CP */
+#define NVC0_CB_AUX_GRID_INFO(i)    0x140 + (i) * 4 /* CP */
 #define NVC0_CB_AUX_GRID_SIZE       (8 * 4)
 /* FB texture handle */
-#define NVC0_CB_AUX_FB_TEX_INFO     0x100 /* FP */
+#define NVC0_CB_AUX_FB_TEX_INFO     0x140 /* FP */
 #define NVC0_CB_AUX_FB_TEX_SIZE     (4)
 /* 8 user clip planes, at 4 32-bits floats each */
-#define NVC0_CB_AUX_UCP_INFO        0x120
+#define NVC0_CB_AUX_UCP_INFO        0x160
 #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
 /* 13 ubos, at 4 32-bits integer each */
-#define NVC0_CB_AUX_UBO_INFO(i)     0x120 + (i) * 4 * 4 /* CP */
+#define NVC0_CB_AUX_UBO_INFO(i)     0x160 + (i) * 4 * 4 /* CP */
 #define NVC0_CB_AUX_UBO_SIZE        ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
 /* 8 sets of 32-bits integer pairs sample offsets */
-#define NVC0_CB_AUX_SAMPLE_INFO     0x1a0 /* FP */
+#define NVC0_CB_AUX_SAMPLE_INFO     0x1e0 /* FP */
 /* 256 bytes, though only 64 bytes used before GM200 */
 #define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 2 * 4 * 4)
 /* draw parameters (index bais, base instance, drawid) */
-#define NVC0_CB_AUX_DRAW_INFO       0x1a0 /* VP */
+#define NVC0_CB_AUX_DRAW_INFO       0x1e0 /* VP */
 /* 32 user buffers, at 4 32-bits integers each */
-#define NVC0_CB_AUX_BUF_INFO(i)     0x2a0 + (i) * 4 * 4
+#define NVC0_CB_AUX_BUF_INFO(i)     0x2e0 + (i) * 4 * 4
 #define NVC0_CB_AUX_BUF_SIZE        (NVC0_MAX_BUFFERS * 4 * 4)
 /* 8 surfaces, at 16 32-bits integers each */
-#define NVC0_CB_AUX_SU_INFO(i)      0x4a0 + (i) * 16 * 4
+#define NVC0_CB_AUX_SU_INFO(i)      0x4e0 + (i) * 16 * 4
 #define NVC0_CB_AUX_SU_SIZE         (NVC0_MAX_IMAGES * 16 * 4)
 /* 1 64-bits address and 1 32-bits sequence */
-#define NVC0_CB_AUX_MP_INFO         0x6a0
+#define NVC0_CB_AUX_MP_INFO         0x6e0
 #define NVC0_CB_AUX_MP_SIZE         3 * 4
 /* 512 64-byte blocks for bindless image handles */
-#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
+#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6f0 + (i) * 16 * 4
 #define NVC0_CB_AUX_BINDLESS_SIZE   (NVE4_IMG_MAX_HANDLES * 16 * 4)
 /* 4 32-bits floats for the vertex runout, put at the end */
 #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 57d98753f4..b3a0954d76 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -600,6 +600,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
    info->io.ucpBase = NVC0_CB_AUX_UCP_INFO;
    info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO;
    info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
+   info->io.msAdjInfoBase = NVC0_CB_AUX_MS_ADJ_INFO(0);
    info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
    info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
    if (info->target >= NVISA_GK104_CHIPSET) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 2eecf59ce0..f67e42052e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -1362,6 +1362,21 @@ nvc0_screen_create(struct nouveau_device *dev)
       PUSH_DATA (push, 1);
       PUSH_DATA (push, 3); /* 7 */
       PUSH_DATA (push, 1);
+
+      /* MS coordinate adjustment information */
+      for (int i = 1; i <= 8; i *= 2) {
+         BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
+         PUSH_DATA (push, NVC0_CB_AUX_MS_ADJ_INFO(i));
+         int ms_x = 0, ms_y = 0;
+         switch (i) {
+         case 1: break;
+         case 2: ms_x = 1; break;
+         case 4: ms_x = 1; ms_y = 1; break;
+         case 8: ms_x = 2; ms_y = 1; break;
+         }
+         PUSH_DATA(push, ms_x);
+         PUSH_DATA(push, ms_y);
+      }
    }
    BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
    PUSH_DATA (push, 0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 8aa8d4936f..b7af7ab0d2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -168,6 +168,28 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    PUSH_DATA (push, 3); /* 7 */
    PUSH_DATA (push, 1);
 
+   /* MS coordinate adjustment information */
+   for (int i = 1; i <= 8; i *= 2) {
+      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 4);
+      PUSH_DATA (push, 8);
+      PUSH_DATA (push, 1);
+      PUSH_DATAh(push, address + NVC0_CB_AUX_MS_ADJ_INFO(i));
+      PUSH_DATA (push, address + NVC0_CB_AUX_MS_ADJ_INFO(i));
+      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 3);
+      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+
+      int ms_x = 0, ms_y = 0;
+      switch (i) {
+      case 1: break;
+      case 2: ms_x = 1; break;
+      case 4: ms_x = 1; ms_y = 1; break;
+      case 8: ms_x = 2; ms_y = 1; break;
+      }
+
+      PUSH_DATA(push, ms_x);
+      PUSH_DATA(push, ms_y);
+   }
+
 #ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
    BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
    PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
-- 
2.17.1



More information about the mesa-dev mailing list