[Mesa-dev] [PATCH v2 06/11] nvc0: add support for indirect compute on Fermi
Samuel Pitoiset
samuel.pitoiset at gmail.com
Sun Feb 7 20:49:57 UTC 2016
When indirect compute is used, the size of the grid (in blocks) is
stored as three integers inside a buffer. This requires a macro to
set up GRIDDIM_YX and GRIDDIM_Z.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
src/gallium/drivers/nouveau/nvc0/mme/Makefile | 2 +-
src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme | 24 ++++++++++
src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h | 19 ++++++++
src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 52 ++++++++++++++--------
src/gallium/drivers/nouveau/nvc0/nvc0_macros.h | 2 +
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +
6 files changed, 81 insertions(+), 20 deletions(-)
create mode 100644 src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
create mode 100644 src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
index 1c0f583..52fb0a5 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile
+++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
@@ -1,5 +1,5 @@
ENVYAS?=envyas
-TARGETS=com9097.mme.h
+TARGETS=com9097.mme.h com90c0.mme.h
all: $(TARGETS)
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
new file mode 100644
index 0000000..a3f1bde
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
@@ -0,0 +1,24 @@
+/* NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT
+ *
+ * arg = num_groups_x
+ * parm[0] = num_groups_y
+ * parm[1] = num_groups_z
+ */
+.section #mme90c0_launch_grid_indirect
+ parm $r2 maddr 0x108e /* GRIDDIM_YX */
+ braz $r1 #fail
+ parm $r3
+ braz annul $r2 #fail
+ braz annul $r3 #fail
+ send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* num_groups_y << 16 | num_groups_x */
+ send $r3
+ maddrsend 0xa7 /* COMPUTE_BEGIN */
+ maddrsend 0x282 /* UNKA08 */
+ maddr 0xda /* LAUNCH */
+ send 0x1000
+ maddrsend 0x281 /* COMPUTE_END */
+ exit maddr 0xd8 /* UNK360 */
+ send 0x1
+fail:
+ exit
+ nop
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
new file mode 100644
index 0000000..1dc06e5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
@@ -0,0 +1,19 @@
+uint32_t mme90c0_launch_grid_indirect[] = {
+ 0x04238251,
+ 0x00034807,
+ 0x00000301,
+/* 0x000e: fail */
+ 0x0002d027,
+ 0x00029827,
+ 0x84008842,
+ 0x00001841,
+ 0x0029c071,
+ 0x00a08071,
+ 0x00368021,
+ 0x04000041,
+ 0x00a04071,
+ 0x003600a1,
+ 0x00004041,
+ 0x00000091,
+ 0x00000011,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 0a4efc0..bcd1c7c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -348,14 +348,6 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
PUSH_DATA (push, cp->num_gprs);
- /* grid/block setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
- PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
- PUSH_DATA (push, info->grid[2]);
- BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
- PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
- PUSH_DATA (push, info->block[2]);
-
/* launch preliminary setup */
BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
PUSH_DATA (push, 0x1);
@@ -364,17 +356,39 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
- /* kernel launching */
- BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
- PUSH_DATA (push, 0x1000);
- BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
- PUSH_DATA (push, 0x1);
+ /* block setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
+ PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
+ PUSH_DATA (push, info->block[2]);
+
+ if (unlikely(info->indirect)) {
+ struct nv04_resource *res = nv04_resource(info->indirect);
+ uint32_t offset = res->offset + info->indirect_offset;
+ unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT;
+
+ nouveau_pushbuf_space(push, 16, 0, 1);
+ PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
+ PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
+ nouveau_pushbuf_data(push, res->bo, offset,
+ NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
+ } else {
+ /* grid setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
+ PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
+ PUSH_DATA (push, info->grid[2]);
+
+ /* kernel launching */
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x1000);
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
+ PUSH_DATA (push, 0x1);
+ }
/* rebind all the 3D constant buffers
* (looks like binding a CB on COMPUTE clobbers 3D state) */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index 49e176c..57262fe 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -35,4 +35,6 @@
#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
+#define NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT 0x00003860
+
#endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 84e4253..85be1cc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -36,6 +36,7 @@
#include "nvc0/nvc0_screen.h"
#include "nvc0/mme/com9097.mme.h"
+#include "nvc0/mme/com90c0.mme.h"
static boolean
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -1053,6 +1054,7 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+ MK_MACRO(NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
--
2.6.4
More information about the mesa-dev
mailing list