[Mesa-dev] [PATCH v2] nvc0: fix bindless multisampled images on Maxwell+
Rhys Perry
pendingchaos02 at gmail.com
Mon Sep 17 15:00:25 UTC 2018
NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small
anyway.
With these changes, TXQ is used to determine the number of samples and
the coordinate adjustment information looked up in a small array in the
driver constant buffer.
v2: rework to use TXQ and a small array instead of a larger array with an
entry for each texture
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
.../drivers/nouveau/codegen/nv50_ir_driver.h | 1 +
.../codegen/nv50_ir_lowering_gm107.cpp | 4 +--
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 31 +++++++++++++++++--
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 3 +-
.../nouveau/codegen/nv50_ir_peephole.cpp | 1 +
.../drivers/nouveau/nvc0/mme/com9097.mme | 8 ++---
.../drivers/nouveau/nvc0/mme/com9097.mme.h | 8 ++---
.../drivers/nouveau/nvc0/nvc0_context.h | 23 ++++++++------
.../drivers/nouveau/nvc0/nvc0_program.c | 1 +
.../drivers/nouveau/nvc0/nvc0_screen.c | 15 +++++++++
.../drivers/nouveau/nvc0/nve4_compute.c | 22 +++++++++++++
11 files changed, 94 insertions(+), 23 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 7c835ceab8..b3da6fc3cf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -188,6 +188,7 @@ struct nv50_ir_prog_info
uint8_t msInfoCBSlot; /* cX[] used for multisample info */
uint16_t msInfoBase; /* base address for multisample info */
uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */
+ uint16_t msAdjInfoBase; /* base address for MS coordinate adjustment info */
} io;
/* driver callback to assign input/output locations */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index c7436e2e29..49a5f3b01f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
if (mask & 0x1)
bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
+ loadMsAdjInfo32(suq->tex.target, 0, slot, ind, suq->tex.bindless));
if (mask & 0x2) {
int d = util_bitcount(mask & 0x1);
bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
+ loadMsAdjInfo32(suq->tex.target, 1, slot, ind, suq->tex.bindless));
}
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 176e0cf608..5db29ba799 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1732,6 +1732,33 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
prog->driver->io.suInfoBase);
}
+inline Value *
+NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless)
+{
+ if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET)
+ return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless);
+
+ assert(bindless);
+
+ Value *samples = bld.getSSA();
+ // This shouldn't be lowered because it's being inserted before the current instruction
+ TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
+ tex->tex.target = target;
+ tex->tex.query = TXQ_TYPE;
+ tex->tex.mask = 0x4;
+ tex->tex.r = 0xff;
+ tex->tex.s = 0x1f;
+ tex->tex.rIndirectSrc = 0;
+ tex->setDef(0, samples);
+ tex->setSrc(0, ind);
+ tex->setSrc(1, bld.loadImm(NULL, 0));
+ bld.insert(tex);
+
+ // XMAD has a higher throughput than SHL and we shouldn't be dealing with >65535 integers here
+ Value *ptr = bld.mkOp3v(OP_XMAD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(8), bld.mkImm(0));
+ return loadResInfo32(ptr, index * 4, prog->driver->io.msAdjInfoBase);
+}
+
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
{
switch (su->tex.target.getEnum()) {
@@ -1817,8 +1844,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
Value *ind = tex->getIndirectR();
- Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
- Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+ Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless);
+ Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless);
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 5dbb3e4f00..4136b1ecfe 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -148,7 +148,7 @@ protected:
void handlePIXLD(Instruction *);
void checkPredicate(Instruction *);
- Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
+ Value *loadMsAdjInfo32(TexInstruction::Target targ, uint32_t index, int slot, Value *ind, bool bindless);
virtual bool visit(Instruction *);
@@ -161,6 +161,7 @@ private:
Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
+ Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
Value *loadBufInfo64(Value *ptr, uint32_t off);
Value *loadBufLength32(Value *ptr, uint32_t off);
Value *loadUboInfo64(Value *ptr, uint32_t off);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index d851cf3c37..f91c502e9e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -317,6 +317,7 @@ IndirectPropagation::visit(BasicBlock *bb)
ImmediateValue imm;
if (!i->src(s).isIndirect(0))
continue;
+
insn = i->getIndirect(s, 0)->getInsn();
if (!insn)
continue;
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 38c2e86843..8ca8f34f9b 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -255,7 +255,7 @@ dei_draw_again:
parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
parm $r4 send $r4 /* index_bias, send start */
maddr 0x18e3 /* CB_POS */
- send 0x1a0 /* 256 + 160 */
+ send 0x1e0 /* 256 + 224 */
braz $r2 #dei_end
parm $r5 send $r4 /* start_instance, send index_bias */
send $r5 /* send start_instance */
@@ -311,7 +311,7 @@ dai_draw_again:
braz $r3 #dai_end
parm $r4 send $r4 /* start_instance */
maddr 0x18e3 /* CB_POS */
- send 0x1a0 /* 256 + 160 */
+ send 0x1e0 /* 256 + 224 */
send 0x0 /* send 0 as base_vertex */
send $r4 /* send start_instance */
send $r6 /* draw id */
@@ -374,7 +374,7 @@ deic_draw_again:
parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
parm $r4 send $r4 /* index_bias, send start */
maddr 0x18e3 /* CB_POS */
- send 0x1a0 /* 256 + 160 */
+ send 0x1e0 /* 256 + 224 */
braz $r2 #deic_end
parm $r5 send $r4 /* start_instance, send index_bias */
send $r5 /* send start_instance */
@@ -455,7 +455,7 @@ daic_draw_again:
braz $r3 #daic_end
parm $r4 send $r4 /* start_instance */
maddr 0x18e3 /* CB_POS */
- send 0x1a0 /* 256 + 160 */
+ send 0x1e0 /* 256 + 224 */
send 0x0 /* send 0 as base_vertex */
send $r4 /* send start_instance */
send $r6 /* draw id */
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index 49c0891114..47c5e6c6e0 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -140,7 +140,7 @@ uint32_t mme9097_draw_elts_indirect[] = {
0x017dc451,
0x00002431,
0x0638c021,
- 0x00680041,
+ 0x00780041,
0x0004d007,
0x00002531,
0x00002841,
@@ -185,7 +185,7 @@ uint32_t mme9097_draw_arrays_indirect[] = {
0x0004d807,
0x00002431,
0x0638c021,
- 0x00680041,
+ 0x00780041,
0x00000041,
0x00002041,
0x00003041,
@@ -233,7 +233,7 @@ uint32_t mme9097_draw_elts_indirect_count[] = {
0x017dc451,
0x00002431,
0x0638c021,
- 0x00680041,
+ 0x00780041,
0x0004d007,
0x00002531,
0x00002841,
@@ -300,7 +300,7 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
0x0004d807,
0x00002431,
0x0638c021,
- 0x00680041,
+ 0x00780041,
0x00000041,
0x00002041,
0x00003041,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 77237a3c0a..1d920c26f5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -122,35 +122,38 @@
/* 8 sets of 32-bits coordinate offsets */
#define NVC0_CB_AUX_MS_INFO 0x0c0
#define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4)
+/* 8 sets of 32-bit pairs containing coordinate adjustment information */
+#define NVC0_CB_AUX_MS_ADJ_INFO(i) 0x100 + (i) * 4 * 2
+#define NVC0_CB_AUX_MS_ADJ_SIZE (8 * 2 * 4)
/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
-#define NVC0_CB_AUX_GRID_INFO(i) 0x100 + (i) * 4 /* CP */
+#define NVC0_CB_AUX_GRID_INFO(i) 0x140 + (i) * 4 /* CP */
#define NVC0_CB_AUX_GRID_SIZE (8 * 4)
/* FB texture handle */
-#define NVC0_CB_AUX_FB_TEX_INFO 0x100 /* FP */
+#define NVC0_CB_AUX_FB_TEX_INFO 0x140 /* FP */
#define NVC0_CB_AUX_FB_TEX_SIZE (4)
/* 8 user clip planes, at 4 32-bits floats each */
-#define NVC0_CB_AUX_UCP_INFO 0x120
+#define NVC0_CB_AUX_UCP_INFO 0x160
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
/* 13 ubos, at 4 32-bits integer each */
-#define NVC0_CB_AUX_UBO_INFO(i) 0x120 + (i) * 4 * 4 /* CP */
+#define NVC0_CB_AUX_UBO_INFO(i) 0x160 + (i) * 4 * 4 /* CP */
#define NVC0_CB_AUX_UBO_SIZE ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
/* 8 sets of 32-bits integer pairs sample offsets */
-#define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */
+#define NVC0_CB_AUX_SAMPLE_INFO 0x1e0 /* FP */
/* 256 bytes, though only 64 bytes used before GM200 */
#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 2 * 4 * 4)
/* draw parameters (index bais, base instance, drawid) */
-#define NVC0_CB_AUX_DRAW_INFO 0x1a0 /* VP */
+#define NVC0_CB_AUX_DRAW_INFO 0x1e0 /* VP */
/* 32 user buffers, at 4 32-bits integers each */
-#define NVC0_CB_AUX_BUF_INFO(i) 0x2a0 + (i) * 4 * 4
+#define NVC0_CB_AUX_BUF_INFO(i) 0x2e0 + (i) * 4 * 4
#define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4)
/* 8 surfaces, at 16 32-bits integers each */
-#define NVC0_CB_AUX_SU_INFO(i) 0x4a0 + (i) * 16 * 4
+#define NVC0_CB_AUX_SU_INFO(i) 0x4e0 + (i) * 16 * 4
#define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4)
/* 1 64-bits address and 1 32-bits sequence */
-#define NVC0_CB_AUX_MP_INFO 0x6a0
+#define NVC0_CB_AUX_MP_INFO 0x6e0
#define NVC0_CB_AUX_MP_SIZE 3 * 4
/* 512 64-byte blocks for bindless image handles */
-#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
+#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6f0 + (i) * 16 * 4
#define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4)
/* 4 32-bits floats for the vertex runout, put at the end */
#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 57d98753f4..b3a0954d76 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -600,6 +600,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.ucpBase = NVC0_CB_AUX_UCP_INFO;
info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO;
info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
+ info->io.msAdjInfoBase = NVC0_CB_AUX_MS_ADJ_INFO(0);
info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
if (info->target >= NVISA_GK104_CHIPSET) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 2eecf59ce0..f67e42052e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -1362,6 +1362,21 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 1);
PUSH_DATA (push, 3); /* 7 */
PUSH_DATA (push, 1);
+
+ /* MS coordinate adjustment information */
+ for (int i = 1; i <= 8; i *= 2) {
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_MS_ADJ_INFO(i));
+ int ms_x = 0, ms_y = 0;
+ switch (i) {
+ case 1: break;
+ case 2: ms_x = 1; break;
+ case 4: ms_x = 1; ms_y = 1; break;
+ case 8: ms_x = 2; ms_y = 1; break;
+ }
+ PUSH_DATA(push, ms_x);
+ PUSH_DATA(push, ms_y);
+ }
}
BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
PUSH_DATA (push, 0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 8aa8d4936f..b7af7ab0d2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -168,6 +168,28 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATA (push, 3); /* 7 */
PUSH_DATA (push, 1);
+ /* MS coordinate adjustment information */
+ for (int i = 1; i <= 8; i *= 2) {
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 4);
+ PUSH_DATA (push, 8);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, address + NVC0_CB_AUX_MS_ADJ_INFO(i));
+ PUSH_DATA (push, address + NVC0_CB_AUX_MS_ADJ_INFO(i));
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 3);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+
+ int ms_x = 0, ms_y = 0;
+ switch (i) {
+ case 1: break;
+ case 2: ms_x = 1; break;
+ case 4: ms_x = 1; ms_y = 1; break;
+ case 8: ms_x = 2; ms_y = 1; break;
+ }
+
+ PUSH_DATA(push, ms_x);
+ PUSH_DATA(push, ms_y);
+ }
+
#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
--
2.17.1
More information about the mesa-dev
mailing list