[Mesa-dev] [PATCH v2 4/4] nouveau: Add support for SV_WORK_DIM
Hans de Goede
hdegoede at redhat.com
Wed Jun 29 12:37:56 UTC 2016
Add support for SV_WORK_DIM for nvc0 and nve4.
Signed-off-by: Hans de Goede <hdegoede at redhat.com>
---
Changes in v2
-Use new NVC0_CB_AUX_GRID_INFO(i) version
Changes in v1 (first non RFC posting):
-Adjust NVC0_CB_AUX_GRID_SIZE for the extra value in grid-info
-Use NVC0_CB_AUX_GRID_SIZE instead of a hardcoded value when
uploading the grid info
-Also implement SV_WORK_DIM for nvc0
---
src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 +
.../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 1 +
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 ++
.../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 +
src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 24 ++++++++++++++++------
src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 4 ++--
src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 +-
src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 7 ++++---
8 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 94e54bb..41804b6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -461,6 +461,7 @@ enum SVSemantic
SV_BASEVERTEX,
SV_BASEINSTANCE,
SV_DRAWID,
+ SV_WORK_DIM,
SV_UNDEFINED,
SV_LAST
};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index ed3249e..7695511 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -408,6 +408,7 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
+ case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;
default:
assert(0);
return nv50_ir::SV_CLOCK;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 67bd73b..e9c3f27 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2372,6 +2372,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
return true;
}
+ // Fallthrough
+ case SV_WORK_DIM:
addr += prog->driver->prop.cp.gridInfoBase;
bld.mkLoad(TYPE_U32, i->getDef(0),
bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 932ec39..04ac288 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -295,6 +295,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
case SV_GRIDID: return kepler ? 0x18 : ~0;
+ case SV_WORK_DIM: return 0x1c;
case SV_SAMPLE_INDEX: return 0;
case SV_SAMPLE_POS: return 0;
case SV_SAMPLE_MASK: return 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 59bbe1e..887fdf2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -356,7 +356,8 @@ nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
}
static void
-nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
+nvc0_compute_upload_input(struct nvc0_context *nvc0,
+ const struct pipe_grid_info *info)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;
@@ -375,13 +376,24 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
/* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
PUSH_DATA (push, 0);
- PUSH_DATAp(push, input, cp->parm_size / 4);
+ PUSH_DATAp(push, info->input, cp->parm_size / 4);
nvc0_compute_invalidate_constbufs(nvc0);
-
- BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
- PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
}
+
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, 2048);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (15 << 8) | 1);
+ BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
+ /* (7) as we only upload work_dim on nvc0, the rest uses special regs */
+ PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
+ PUSH_DATA (push, info->work_dim);
+
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
}
void
@@ -398,7 +410,7 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
return;
}
- nvc0_compute_upload_input(nvc0, info->input);
+ nvc0_compute_upload_input(nvc0, info);
BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 4868a64..912278d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -114,9 +114,9 @@
/* 8 sets of 32-bits coordinate offsets */
#define NVC0_CB_AUX_MS_INFO 0x0a0 /* CP */
#define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4)
-/* block/grid size, at 3 32-bits integers each and gridid */
+/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
#define NVC0_CB_AUX_GRID_INFO(i) 0x0e0 + (i) * 4 /* CP */
-#define NVC0_CB_AUX_GRID_SIZE (7 * 4)
+#define NVC0_CB_AUX_GRID_SIZE (8 * 4)
/* 8 user clip planes, at 4 32-bits floats each */
#define NVC0_CB_AUX_UCP_INFO 0x100
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index f151d51..d49614f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -562,13 +562,13 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.auxCBSlot = 7;
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
- info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO(0);
info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0);
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
+ info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO(0);
} else {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 5fddd92..d1cf59a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -434,7 +434,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO(0));
PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO(0));
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
- PUSH_DATA (push, 7 * 4);
+ PUSH_DATA (push, NVC0_CB_AUX_GRID_SIZE);
PUSH_DATA (push, 0x1);
if (unlikely(info->indirect)) {
@@ -444,18 +444,19 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
nouveau_pushbuf_space(push, 16, 0, 1);
PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
- BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, info->block, 3);
nouveau_pushbuf_data(push, res->bo, offset,
NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
} else {
- BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, info->block, 3);
PUSH_DATAp(push, info->grid, 3);
}
PUSH_DATA (push, 0);
+ PUSH_DATA (push, info->work_dim);
BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
--
2.7.4
More information about the mesa-dev
mailing list