[Mesa-dev] [PATCH 2/3] nvc0: add support for indirect drawing
Ilia Mirkin
imirkin at alum.mit.edu
Wed Jul 9 20:59:04 PDT 2014
From: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
docs/GL3.txt | 5 +-
docs/relnotes/10.3.html | 1 +
src/gallium/drivers/nouveau/nouveau_screen.c | 6 +-
src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h | 2 +-
src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme | 75 ++++++++++++++++++
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 56 ++++++++++++++
src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h | 6 ++
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 13 +++-
src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 90 ++++++++++++++++------
10 files changed, 223 insertions(+), 33 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 296e14c..8f41df0 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -98,7 +98,7 @@ GL 4.0:
GLSL 4.0 not started
GL_ARB_draw_buffers_blend DONE (i965, nv50, nvc0, r600, radeonsi, softpipe)
- GL_ARB_draw_indirect DONE (i965)
+ GL_ARB_draw_indirect DONE (i965, nvc0)
GL_ARB_gpu_shader5 started
- 'precise' qualifier DONE
- Dynamically uniform sampler array indices started (Chris)
@@ -123,7 +123,6 @@ GL 4.0:
GL_ARB_transform_feedback2 DONE (i965, nv50, nvc0, r600, radeonsi)
GL_ARB_transform_feedback3 DONE (i965, nv50, nvc0, r600, radeonsi)
-
GL 4.1:
GLSL 4.1 not started
@@ -165,7 +164,7 @@ GL 4.3:
GL_ARB_framebuffer_no_attachments not started
GL_ARB_internalformat_query2 not started
GL_ARB_invalidate_subdata DONE (all drivers)
- GL_ARB_multi_draw_indirect DONE (i965)
+ GL_ARB_multi_draw_indirect DONE (i965, nvc0)
GL_ARB_program_interface_query not started
GL_ARB_robust_buffer_access_behavior not started
GL_ARB_shader_image_size not started
diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html
index 2e718fc..7d4f533 100644
--- a/docs/relnotes/10.3.html
+++ b/docs/relnotes/10.3.html
@@ -56,6 +56,7 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_seamless_cubemap_per_texture on i965, llvmpipe, nvc0, r600, radeonsi, softpipe</li>
<li>GL_ARB_fragment_layer_viewport on nv50, nvc0, llvmpipe, r600</li>
<li>GL_AMD_vertex_shader_viewport_index on i965/gen7+, r600</li>
+<li>GL_ARB_(multi_)draw_indirect on nvc0</li>
</ul>
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 9d71bf7..517978d 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -194,12 +194,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
screen->vidmem_bindings =
PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
- PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR |
+ PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
+ PIPE_BIND_CURSOR |
PIPE_BIND_SAMPLER_VIEW |
PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE |
PIPE_BIND_GLOBAL;
screen->sysmem_bindings =
- PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT;
+ PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_COMMAND_ARGS_BUFFER;
memset(&mm_config, 0, sizeof(mm_config));
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h
index cc3a382..7523fdc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h
@@ -479,7 +479,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_3D_WATCHDOG_TIMER 0x00000de4
-#define NV50_3D_UNK0DE8 0x00000de8
+#define NV50_3D_PRIM_RESTART_WITH_DRAW_ARRAYS 0x00000de8
#define NV50_3D_UNK0DEC 0x00000dec
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 17d8fa3..fd63819 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -437,6 +437,8 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
+ PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index da9975b..07e4519 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -223,3 +223,78 @@ locn_0a_ts:
locn_0f_ts:
exit maddr 0xbb
send $r6
+
+/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT
+ *
+ * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
+ *
+ * arg = mode
+ * parm[0] = count
+ * parm[1] = instance_count
+ * parm[2] = start
+ * parm[3] = index_bias
+ * parm[4] = start_instance
+ */
+.section #mme9097_draw_elts_indirect
+ parm $r3 /* count */
+ parm $r2 /* instance_count */
+ parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
+ parm $r4 send $r4 /* index_bias, send start */
+ braz $r2 #dei_end
+ parm $r5 /* start_instance */
+ read $r6 0x50d /* VB_ELEMENT_BASE */
+ read $r7 0x50e /* VB_INSTANCE_BASE */
+ maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+ send $r4
+ send $r5
+ mov $r4 0x1
+dei_again:
+ maddr 0x586 /* VERTEX_BEGIN_GL */
+ send $r1 /* mode */
+ maddr 0x5f8 /* INDEX_BATCH_COUNT */
+ send $r3 /* count */
+ mov $r2 (sub $r2 $r4)
+ maddrsend 0x585 /* VERTEX_END_GL */
+ branz $r2 #dei_again
+ mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+ maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+ exit send $r6
+ send $r7
+dei_end:
+ exit
+ nop
+
+/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT:
+ *
+ * NOTE: Saves and restores VB_INSTANCE_BASE.
+ *
+ * arg = mode
+ * parm[0] = count
+ * parm[1] = instance_count
+ * parm[2] = start
+ * parm[3] = start_instance
+ */
+.section #mme9097_draw_arrays_indirect
+ parm $r2 /* count */
+ parm $r3 /* instance_count */
+ parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
+ parm $r4 send $r4 /* start_instance */
+ braz $r3 #dai_end
+ read $r6 0x50e /* VB_INSTANCE_BASE */
+ maddr 0x50e /* VB_INSTANCE_BASE */
+ mov $r5 0x1
+ send $r4
+dai_again:
+ maddr 0x586 /* VERTEX_BEGIN_GL */
+ send $r1 /* mode */
+ maddr 0x35e /* VERTEX_BUFFER_COUNT */
+ send $r2
+ mov $r3 (sub $r3 $r5)
+ maddrsend 0x585 /* VERTEX_END_GL */
+ branz $r3 #dai_again
+ mov $r1 (extrinsrt $r1 $r5 0 1 26) /* set INSTANCE_NEXT */
+ exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
+ send $r6
+dai_end:
+ exit
+ nop
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index fd81a2f..654bf93 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -123,3 +123,59 @@ uint32_t mme9097_tep_select[] = {
0x002ec0a1,
0x00003041,
};
+
+uint32_t mme9097_draw_elts_indirect[] = {
+ 0x00000301,
+ 0x00000201,
+ 0x017dc451,
+/* 0x000c: dei_again */
+ 0x00002431,
+ 0x0004d007,
+/* 0x0017: dei_end */
+ 0x00000501,
+ 0x01434615,
+ 0x01438715,
+ 0x05434021,
+ 0x00002041,
+ 0x00002841,
+ 0x00004411,
+ 0x01618021,
+ 0x00000841,
+ 0x017e0021,
+ 0x00001841,
+ 0x00051210,
+ 0x01614071,
+ 0xfffe9017,
+ 0xd0410912,
+ 0x05434021,
+ 0x000030c1,
+ 0x00003841,
+ 0x00000091,
+ 0x00000011,
+};
+
+uint32_t mme9097_draw_arrays_indirect[] = {
+ 0x00000201,
+ 0x00000301,
+/* 0x0009: dai_again */
+ 0x00d74451,
+ 0x00002431,
+/* 0x0013: dai_end */
+ 0x0003d807,
+ 0x01438615,
+ 0x01438021,
+ 0x00004511,
+ 0x00002041,
+ 0x01618021,
+ 0x00000841,
+ 0x00d78021,
+ 0x00001041,
+ 0x00055b10,
+ 0x01614071,
+ 0xfffe9817,
+ 0xd0414912,
+ 0x014380a1,
+ 0x00003041,
+ 0x00000091,
+ 0x00000011,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
index 9e3c56b..94b447b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
@@ -338,6 +338,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_WATCHDOG_TIMER 0x00000de4
+#define NVC0_3D_PRIM_RESTART_WITH_DRAW_ARRAYS 0x00000de8
+
#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8
#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc
@@ -1347,5 +1349,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_MACRO_TEP_SELECT 0x00003830
+#define NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT 0x00003838
+
+#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT 0x00003840
+
#endif /* NVC0_3D_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index dc9b143..3f444a4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -158,6 +158,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_VERTEX_BUFFERS:
@@ -183,7 +184,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_DRAW_INDIRECT:
return 0;
}
@@ -405,6 +405,8 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
size /= 4;
+ assert((pos + size) <= 0x800);
+
BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
PUSH_DATA (push, (m - 0x3800) / 8);
PUSH_DATA (push, pos);
@@ -433,8 +435,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
PUSH_DATA (push, (3 << 16) | 3);
BEGIN_NVC0(push, SUBC_3D(0x1794), 1);
PUSH_DATA (push, (2 << 16) | 2);
- BEGIN_NVC0(push, SUBC_3D(0x0de8), 1);
- PUSH_DATA (push, 1);
if (obj_class < GM107_3D_CLASS) {
BEGIN_NVC0(push, SUBC_3D(0x12ac), 1);
@@ -609,7 +609,8 @@ nvc0_screen_create(struct nouveau_device *dev)
push->rsvd_kick = 5;
screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_COMMAND_ARGS_BUFFER;
screen->base.sysmem_bindings |=
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
@@ -768,6 +769,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_3D(LINE_LAST_PIXEL), 1);
PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
+ PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_3D(BLEND_SEPARATE_ALPHA), 1);
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
@@ -954,6 +957,8 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index c26b98f..f99d533 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -573,6 +573,8 @@ nvc0_draw_arrays(struct nvc0_context *nvc0,
unsigned prim;
if (nvc0->state.index_bias) {
+ /* index_bias is implied 0 if !info->indexed (really ?) */
+ /* TODO: can we deactivate it for the VERTEX_BUFFER_FIRST command ? */
PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
nvc0->state.index_bias = 0;
@@ -794,6 +796,61 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
}
}
+static void
+nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(info->indirect);
+ unsigned size;
+ const uint32_t offset = buf->offset + info->indirect_offset;
+
+ /* must make FIFO wait for engines idle before continuing to process */
+ if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
+ IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
+
+ PUSH_SPACE(push, 8);
+ if (info->indexed) {
+ assert(nvc0->idxbuf.buffer);
+ assert(nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer));
+ size = 5 * 4;
+ BEGIN_1IC0(push, NVC0_3D(MACRO_DRAW_ELEMENTS_INDIRECT), 1 + size / 4);
+ } else {
+ if (nvc0->state.index_bias) {
+ /* index_bias is implied 0 if !info->indexed (really ?) */
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
+ nvc0->state.index_bias = 0;
+ }
+ size = 4 * 4;
+ BEGIN_1IC0(push, NVC0_3D(MACRO_DRAW_ARRAYS_INDIRECT), 1 + size / 4);
+ }
+ PUSH_DATA(push, nvc0_prim_gl(info->mode));
+#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
+ nouveau_pushbuf_space(push, 0, 0, 1);
+ nouveau_pushbuf_data(push,
+ buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size);
+}
+
+static INLINE void
+nvc0_update_prim_restart(struct nvc0_context *nvc0, boolean en, uint32_t index)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ if (en != nvc0->state.prim_restart) {
+ if (en) {
+ BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, index);
+ } else {
+ IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
+ }
+ nvc0->state.prim_restart = en;
+ } else
+ if (en) {
+ BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1);
+ PUSH_DATA (push, index);
+ }
+}
+
void
nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
@@ -885,42 +942,29 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
nvc0->base.vbo_dirty = TRUE;
+ nvc0_update_prim_restart(nvc0, info->primitive_restart, info->restart_index);
+
if (nvc0->base.vbo_dirty) {
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
nvc0->base.vbo_dirty = FALSE;
}
+ if (unlikely(info->indirect)) {
+ nvc0_draw_indirect(nvc0, info);
+ } else
+ if (unlikely(info->count_from_stream_output)) {
+ nvc0_draw_stream_output(nvc0, info);
+ } else
if (info->indexed) {
boolean shorten = info->max_index <= 65535;
- if (info->primitive_restart != nvc0->state.prim_restart) {
- if (info->primitive_restart) {
- BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
- PUSH_DATA (push, 1);
- PUSH_DATA (push, info->restart_index);
-
- if (info->restart_index > 65535)
- shorten = FALSE;
- } else {
- IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
- }
- nvc0->state.prim_restart = info->primitive_restart;
- } else
- if (info->primitive_restart) {
- BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1);
- PUSH_DATA (push, info->restart_index);
-
- if (info->restart_index > 65535)
- shorten = FALSE;
- }
+ if (info->primitive_restart && info->restart_index > 65535)
+ shorten = FALSE;
nvc0_draw_elements(nvc0, shorten,
info->mode, info->start, info->count,
info->instance_count, info->index_bias);
- } else
- if (unlikely(info->count_from_stream_output)) {
- nvc0_draw_stream_output(nvc0, info);
} else {
nvc0_draw_arrays(nvc0,
info->mode, info->start, info->count,
--
1.8.5.5
More information about the mesa-dev
mailing list