[Mesa-dev] [PATCH] nvc0: fix bindless multisampled images on Maxwell+
Rhys Perry
pendingchaos02 at gmail.com
Fri Jul 20 14:56:21 UTC 2018
NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small
anyway.
This adds a new array, NVC0_CB_AUX_SU_MS_INFO, which aliases it and
provides the needed information.
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
.../drivers/nouveau/codegen/nv50_ir_driver.h | 1 +
.../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 4 +-
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 49 +++++++++++++++++++++-
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 32 +-------------
src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 5 ++-
src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 +
src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 28 +++++++++++++
src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 26 +++++++++++-
8 files changed, 110 insertions(+), 37 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 7c835ceab8..0045ef729d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -188,6 +188,7 @@ struct nv50_ir_prog_info
uint8_t msInfoCBSlot; /* cX[] used for multisample info */
uint16_t msInfoBase; /* base address for multisample info */
uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */
+ uint16_t suMsInfoBase; /* base address for surface multisample info (gm107+) */
} io;
/* driver callback to assign input/output locations */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index c7436e2e29..d8eeaf4d09 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
if (mask & 0x1)
bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
+ loadSuMsInfo32(handle, 0));
if (mask & 0x2) {
int d = util_bitcount(mask & 0x1);
bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
+ loadSuMsInfo32(handle, 1));
}
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 1410cf26c8..e782b5bef9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1712,6 +1712,35 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
+/* On nvc0, surface info is obtained via the surface binding points passed
+ * to the SULD/SUST instructions.
+ * On nve4, surface info is stored in c[] and is used by various special
+ * instructions, e.g. for clamping coordinates or generating an address.
+ * They couldn't just have added an equivalent to TIC now, couldn't they ?
+ */
+#define NVC0_SU_INFO_ADDR 0x00
+#define NVC0_SU_INFO_FMT 0x04
+#define NVC0_SU_INFO_DIM_X 0x08
+#define NVC0_SU_INFO_PITCH 0x0c
+#define NVC0_SU_INFO_DIM_Y 0x10
+#define NVC0_SU_INFO_ARRAY 0x14
+#define NVC0_SU_INFO_DIM_Z 0x18
+#define NVC0_SU_INFO_UNK1C 0x1c
+#define NVC0_SU_INFO_WIDTH 0x20
+#define NVC0_SU_INFO_HEIGHT 0x24
+#define NVC0_SU_INFO_DEPTH 0x28
+#define NVC0_SU_INFO_TARGET 0x2c
+#define NVC0_SU_INFO_BSIZE 0x30
+#define NVC0_SU_INFO_RAW_X 0x34
+#define NVC0_SU_INFO_MS_X 0x38
+#define NVC0_SU_INFO_MS_Y 0x3c
+
+#define NVC0_SU_INFO__STRIDE 0x40
+
+#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8)
+#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
+#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4)
+
inline Value *
NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless)
{
@@ -1732,6 +1761,15 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
prog->driver->io.suInfoBase);
}
+inline Value *
+NVC0LoweringPass::loadSuMsInfo32(Value *handle, uint32_t index)
+{
+ Value *ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), handle, bld.mkImm(2047));
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(3));
+
+ return loadResInfo32(ptr, index * 4, prog->driver->io.suMsInfoBase);
+}
+
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
{
switch (su->tex.target.getEnum()) {
@@ -1817,8 +1855,15 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
Value *ind = tex->getIndirectR();
- Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
- Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+ Value *ms_x, *ms_y;
+ if (targ->getChipset() >= NVISA_GM107_CHIPSET) {
+ Value *handle = tex->tex.bindless ? ind : loadTexHandle(ind, slot + 32);
+ ms_x = loadSuMsInfo32(handle, 0);
+ ms_y = loadSuMsInfo32(handle, 1);
+ } else {
+ ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
+ ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+ }
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 8724c09afd..5b3918d906 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -23,35 +23,6 @@
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_build_util.h"
-/* On nvc0, surface info is obtained via the surface binding points passed
- * to the SULD/SUST instructions.
- * On nve4, surface info is stored in c[] and is used by various special
- * instructions, e.g. for clamping coordinates or generating an address.
- * They couldn't just have added an equivalent to TIC now, couldn't they ?
- */
-#define NVC0_SU_INFO_ADDR 0x00
-#define NVC0_SU_INFO_FMT 0x04
-#define NVC0_SU_INFO_DIM_X 0x08
-#define NVC0_SU_INFO_PITCH 0x0c
-#define NVC0_SU_INFO_DIM_Y 0x10
-#define NVC0_SU_INFO_ARRAY 0x14
-#define NVC0_SU_INFO_DIM_Z 0x18
-#define NVC0_SU_INFO_UNK1C 0x1c
-#define NVC0_SU_INFO_WIDTH 0x20
-#define NVC0_SU_INFO_HEIGHT 0x24
-#define NVC0_SU_INFO_DEPTH 0x28
-#define NVC0_SU_INFO_TARGET 0x2c
-#define NVC0_SU_INFO_BSIZE 0x30
-#define NVC0_SU_INFO_RAW_X 0x34
-#define NVC0_SU_INFO_MS_X 0x38
-#define NVC0_SU_INFO_MS_Y 0x3c
-
-#define NVC0_SU_INFO__STRIDE 0x40
-
-#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8)
-#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
-#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4)
-
namespace nv50_ir {
class NVC0LegalizeSSA : public Pass
@@ -148,7 +119,7 @@ protected:
void handlePIXLD(Instruction *);
void checkPredicate(Instruction *);
- Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
+ Value *loadSuMsInfo32(Value *handle, uint32_t index);
virtual bool visit(Instruction *);
@@ -161,6 +132,7 @@ private:
Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
+ Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
Value *loadBufInfo64(Value *ptr, uint32_t off);
Value *loadBufLength32(Value *ptr, uint32_t off);
Value *loadUboInfo64(Value *ptr, uint32_t off);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 77237a3c0a..8aecfd8f6d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -149,9 +149,12 @@
/* 1 64-bits address and 1 32-bits sequence */
#define NVC0_CB_AUX_MP_INFO 0x6a0
#define NVC0_CB_AUX_MP_SIZE 3 * 4
-/* 512 64-byte blocks for bindless image handles */
+/* 512 64-byte blocks for bindless image handles (NVE4 only) */
#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
#define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4)
+/* 2048 8-byte blocks for image multisampling info (GM107+) */
+#define NVC0_CB_AUX_SU_MS_INFO(i) 0x6b0 + (i) * 2 * 4
+#define NVC0_CB_AUX_SU_MS_SIZE (NVC0_TIC_MAX_ENTRIES * 2 * 4)
/* 4 32-bits floats for the vertex runout, put at the end */
#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 57d98753f4..10e9815ccc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -607,6 +607,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0);
}
+ if (info->target >= NVISA_GM107_CHIPSET)
+ info->io.suMsInfoBase = NVC0_CB_AUX_SU_MS_INFO(0);
if (prog->type == PIPE_SHADER_COMPUTE) {
if (info->target >= NVISA_GK104_CHIPSET) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index f40600e48a..4b4359c889 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -1258,6 +1258,17 @@ gm107_validate_surfaces(struct nvc0_context *nvc0,
BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
PUSH_DATA (push, tic->id);
+
+ /* upload multisampling info */
+ if (view->resource->target == PIPE_TEXTURE_2D ||
+ view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
+ struct nv50_miptree *mt = nv50_miptree(view->resource);
+
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id));
+ PUSH_DATA (push, mt->ms_x);
+ PUSH_DATA (push, mt->ms_y);
+ }
}
static inline void
@@ -1398,6 +1409,7 @@ gm107_create_image_handle(struct pipe_context *pipe,
struct pipe_sampler_view *sview =
gm107_create_texture_view_from_image(pipe, view);
struct nv50_tic_entry *tic = nv50_tic_entry(sview);
+ int s;
if (tic == NULL)
goto fail;
@@ -1415,6 +1427,22 @@ gm107_create_image_handle(struct pipe_context *pipe,
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+ if (view->resource->target == PIPE_TEXTURE_2D ||
+ view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
+ struct nv50_miptree *mt = nv50_miptree(view->resource);
+
+ for (s = 0; s < 6; s++) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id));
+ PUSH_DATA (push, mt->ms_x);
+ PUSH_DATA (push, mt->ms_y);
+ }
+ }
+
return 0x100000000ULL | tic->id;
fail:
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 28460f8cbe..7d436d1980 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -251,6 +251,23 @@ gm107_compute_validate_surfaces(struct nvc0_context *nvc0,
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATA (push, tic->id);
+ /* upload multisampling info */
+ if (view->resource->target == PIPE_TEXTURE_2D ||
+ view->resource->target == PIPE_TEXTURE_2D_ARRAY) {
+ struct nv50_miptree *mt = nv50_miptree(view->resource);
+
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id));
+ PUSH_DATA (push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id));
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 8);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 3);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATA (push, mt->ms_x);
+ PUSH_DATA (push, mt->ms_y);
+ }
+
BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
@@ -558,6 +575,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
{
const struct nvc0_screen *screen = nvc0->screen;
const struct nvc0_program *cp = nvc0->compprog;
+ int cb_size;
nve4_cp_launch_desc_init_default(desc);
@@ -586,8 +604,12 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
NVC0_CB_USR_INFO(5), 1 << 16);
}
+
+ cb_size = 1 << 12;
+ if (nvc0->screen->compute->oclass >= GM107_COMPUTE_CLASS)
+ cb_size = 1 << 15; // make room for NVC0_CB_AUX_SU_MS_INFO
nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
- NVC0_CB_AUX_INFO(5), 1 << 11);
+ NVC0_CB_AUX_INFO(5), cb_size);
}
static void
@@ -625,7 +647,7 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0,
NVC0_CB_USR_INFO(5), 1 << 16);
}
gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
- NVC0_CB_AUX_INFO(5), 1 << 11);
+ NVC0_CB_AUX_INFO(5), 1 << 15);
}
static inline void *
--
2.14.4
More information about the mesa-dev
mailing list