[Mesa-dev] [PATCH 02/11] nvc0: bind driver cb for compute on c7[] for Kepler
Samuel Pitoiset
samuel.pitoiset at gmail.com
Sat Feb 27 14:01:58 UTC 2016
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 +-
src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 11 ++++++-
src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 10 +++---
src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 38 ++++++++++++++--------
src/gallium/drivers/nouveau/nvc0/nve4_compute.h | 25 --------------
5 files changed, 41 insertions(+), 45 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 8abdd93..d6dfed3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1734,7 +1734,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
}
addr += prog->driver->prop.cp.gridInfoBase;
bld.mkLoad(TYPE_U32, i->getDef(0),
- bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
+ bld.mkSymbol(FILE_MEMORY_CONST, 7, TYPE_U32, addr), NULL);
break;
case SV_SAMPLE_INDEX:
// TODO: Properly pass source as an address in the PIX address space
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 203e479..dcb0bda 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -102,7 +102,7 @@
#define NVC0_CB_USR_INFO(s) (s << 16)
#define NVC0_CB_USR_SIZE (6 << 16)
/* 6 driver constbuts, at 1K each */
-#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 10)
+#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 12)
#define NVC0_CB_AUX_SIZE (6 << 10)
/* TIC/TSC entries (6 user clip planes, base instance id) */
#define NVC0_CB_AUX_TXC_INFO 0x000
@@ -113,14 +113,23 @@
/* 8 user clip planes, at 4 32-bits floats each */
#define NVC0_CB_AUX_UCP_INFO 0x100
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
+/* 8 sets of 32-buts pairs MS offsets */
+#define NVC0_CB_AUX_MS_INFO 0x100 /* CP */
+#define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4)
/* 8 sets of 32-bits integer pairs sample offsets */
#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */
#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
/* draw parameters (index bais, base instance, drawid) */
#define NVC0_CB_AUX_DRAW_INFO 0x180 /* VP */
+/* block/grid size, at 3 32-bits integers each and gridid */
+#define NVC0_CB_AUX_GRID_INFO 0x180 /* CP */
+#define NVC0_CB_AUX_GRID_SIZE (7 * 4)
/* 32 user buffers, at 4 32-bits integers each */
#define NVC0_CB_AUX_BUF_INFO(i) 0x200 + (i) * 4 * 4
#define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4)
+/* 32 surfaces, at 16 32-bits integers each */
+#define NVC0_CB_AUX_SUF_INFO(i) 0x400 + (i) * 16 * 4
+#define NVC0_CB_AUX_SUF_SIZE (32 * 16 * 4)
/* 4 32-bits floats for the vertex runout, put at the end */
#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + NVC0_CB_AUX_SIZE
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index d01de73..8f1f942 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -540,10 +540,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
- info->io.resInfoCBSlot = 0;
- info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
- info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
- info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
+ info->io.resInfoCBSlot = 7;
+ info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
+ info->io.suInfoBase = NVC0_CB_AUX_SUF_INFO(0);
+ info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
info->io.bufInfoBase = 0; /* TODO */
} else {
info->io.resInfoCBSlot = 15;
@@ -551,7 +551,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.suInfoBase = 0; /* TODO */
}
info->io.msInfoCBSlot = 0;
- info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
+ info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
} else {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 4a4e836..b2059bb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
int i;
int ret;
uint32_t obj_class;
+ uint32_t address;
switch (dev->chipset & ~0xf) {
case 0x100:
@@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
return ret;
}
- ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
&screen->parm);
if (ret)
return ret;
@@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
}
BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
- PUSH_DATA (push, 0); /* does not interefere with 3D */
+ PUSH_DATA (push, 7); /* does not interefere with 3D */
if (obj_class == NVF0_COMPUTE_CLASS)
IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
+ address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
+
/* MS sample coordinate offsets: these do not work with _ALT modes ! */
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
- PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
+ PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);
+ PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, 1);
@@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATA (push, 3); /* 7 */
PUSH_DATA (push, 1);
-#ifdef DEBUG
+#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
@@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
uint32_t mask;
unsigned i;
const unsigned t = 1;
+ uint32_t address;
+
+ address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
mask = nvc0->surfaces_dirty[t];
while (mask) {
@@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
* directly instead of via binding points, so we have to supply them.
*/
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
- PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
+ PUSH_DATAh(push, address + NVC0_CB_AUX_SUF_INFO(i));
+ PUSH_DATA (push, address + NVC0_CB_AUX_SUF_INFO(i));
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, 1);
@@ -271,6 +277,7 @@ static void
nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
uint64_t address;
const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
unsigned i, n;
@@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
n = util_logbase2(dirty) + 1 - i;
assert(n);
- address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
+ address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, address);
- PUSH_DATA (push, address);
+ PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));
+ PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, n * 4);
PUSH_DATA (push, 0x1);
@@ -337,6 +344,9 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *cp = nvc0->compprog;
+ uint32_t address;
+
+ address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
if (cp->parm_size) {
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
@@ -350,8 +360,8 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
PUSH_DATAp(push, input, cp->parm_size / 4);
}
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
- PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
+ PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
+ PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 7 * 4);
PUSH_DATA (push, 0x1);
@@ -412,6 +422,8 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
}
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
+ nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
+ NVC0_CB_AUX_INFO(5), 1 << 10);
}
static inline struct nve4_cp_launch_desc *
@@ -498,7 +510,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const unsigned s = 5;
unsigned i;
- uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
+ uint32_t commands[2][32];
unsigned n[2] = { 0, 0 };
for (i = 0; i < nvc0->num_textures[s]; ++i) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
index 84f8593..dcafbed 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -4,31 +4,6 @@
#include "nvc0/nve4_compute.xml.h"
-/* Input space is implemented as c0[], to which we bind the screen->parm bo.
- */
-#define NVE4_CP_INPUT_USER 0x0000
-#define NVE4_CP_INPUT_USER_LIMIT 0x1000
-#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4)
-#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4)
-#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4)
-#define NVE4_CP_INPUT_GRIDID 0x1018
-#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4)
-#define NVE4_CP_INPUT_TEX_STRIDE 4
-#define NVE4_CP_INPUT_TEX_MAX 32
-#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
-#define NVE4_CP_INPUT_SUF_STRIDE 64
-#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
-#define NVE4_CP_INPUT_SUF_MAX 32
-#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900
-#define NVE4_CP_INPUT_TEMP_PTR 0x1908
-#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910
-#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
-#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918
-#define NVE4_CP_INPUT_SIZE 0x1a00
-#define NVE4_CP_PARAM_TRAP_INFO 0x2000
-#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16)
-#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))
-
struct nve4_cp_launch_desc
{
u32 unk0[8];
--
2.7.1
More information about the mesa-dev
mailing list