[Mesa-dev] [PATCH 2/4] gallium: separate indirect stuff from pipe_draw_info - 80 -> 56 bytes

Marek Olšák maraeo at gmail.com
Fri Apr 28 23:12:07 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

For faster initialization of non-indirect draws.
---
 src/gallium/auxiliary/util/u_draw.c          |  4 +-
 src/gallium/auxiliary/util/u_dump_state.c    | 15 ++++---
 src/gallium/auxiliary/util/u_vbuf.c          |  8 ++--
 src/gallium/docs/source/screen.rst           |  2 +-
 src/gallium/drivers/ddebug/dd_draw.c         | 42 ++++++++++-------
 src/gallium/drivers/ddebug/dd_pipe.h         |  7 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c  | 16 +++----
 src/gallium/drivers/r600/r600_state_common.c | 12 ++---
 src/gallium/drivers/radeonsi/si_state_draw.c | 59 +++++++++++++-----------
 src/gallium/drivers/trace/tr_dump_state.c    | 12 ++++-
 src/gallium/include/pipe/p_state.h           | 67 +++++++++++++++-------------
 src/gallium/state_trackers/nine/device9.c    |  1 -
 src/gallium/state_trackers/nine/nine_state.c |  1 -
 src/mesa/state_tracker/st_draw.c             | 19 ++++----
 14 files changed, 152 insertions(+), 113 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_draw.c b/src/gallium/auxiliary/util/u_draw.c
index ca78648..e7abbfc 100644
--- a/src/gallium/auxiliary/util/u_draw.c
+++ b/src/gallium/auxiliary/util/u_draw.c
@@ -138,22 +138,22 @@ util_draw_indirect(struct pipe_context *pipe,
    uint32_t *params;
    const unsigned num_params = info_in->indexed ? 5 : 4;
 
    assert(info_in->indirect);
    assert(!info_in->count_from_stream_output);
 
    memcpy(&info, info_in, sizeof(info));
 
    params = (uint32_t *)
       pipe_buffer_map_range(pipe,
-                            info_in->indirect,
-                            info_in->indirect_offset,
+                            info_in->indirect->buffer,
+                            info_in->indirect->offset,
                             num_params * sizeof(uint32_t),
                             PIPE_TRANSFER_READ,
                             &transfer);
    if (!transfer) {
       debug_printf("%s: failed to map indirect buffer\n", __FUNCTION__);
       return;
    }
 
    info.count = params[0];
    info.instance_count = params[1];
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index 0af81f7..9c32557 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -932,25 +932,30 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state)
 
    util_dump_member(stream, int,  state, index_bias);
    util_dump_member(stream, uint, state, min_index);
    util_dump_member(stream, uint, state, max_index);
 
    util_dump_member(stream, bool, state, primitive_restart);
    util_dump_member(stream, uint, state, restart_index);
 
    util_dump_member(stream, ptr, state, count_from_stream_output);
 
-   util_dump_member(stream, ptr, state, indirect);
-   util_dump_member(stream, uint, state, indirect_offset);
-   util_dump_member(stream, uint, state, indirect_stride);
-   util_dump_member(stream, uint, state, indirect_count);
-   util_dump_member(stream, uint, state, indirect_params_offset);
+   if (!state->indirect) {
+      util_dump_member(stream, ptr, state, indirect);
+   } else {
+      util_dump_member(stream, uint, state, indirect->offset);
+      util_dump_member(stream, uint, state, indirect->stride);
+      util_dump_member(stream, uint, state, indirect->draw_count);
+      util_dump_member(stream, uint, state, indirect->indirect_draw_count_offset);
+      util_dump_member(stream, ptr, state, indirect->buffer);
+      util_dump_member(stream, ptr, state, indirect->indirect_draw_count);
+   }
 
    util_dump_struct_end(stream);
 }
 
 void util_dump_box(FILE *stream, const struct pipe_box *box)
 {
    if (!box) {
       util_dump_null(stream);
       return;
    }
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index 62b88ac..9d6d529 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -1161,29 +1161,29 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
    }
 
    new_info = *info;
 
    /* Fallback. We need to know all the parameters. */
    if (new_info.indirect) {
       struct pipe_transfer *transfer = NULL;
       int *data;
 
       if (new_info.indexed) {
-         data = pipe_buffer_map_range(pipe, new_info.indirect,
-                                      new_info.indirect_offset, 20,
+         data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
+                                      new_info.indirect->offset, 20,
                                       PIPE_TRANSFER_READ, &transfer);
          new_info.index_bias = data[3];
          new_info.start_instance = data[4];
       }
       else {
-         data = pipe_buffer_map_range(pipe, new_info.indirect,
-                                      new_info.indirect_offset, 16,
+         data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
+                                      new_info.indirect->offset, 16,
                                       PIPE_TRANSFER_READ, &transfer);
          new_info.start_instance = data[3];
       }
 
       new_info.count = data[0];
       new_info.instance_count = data[1];
       new_info.start = data[2];
       pipe_buffer_unmap(pipe, transfer);
       new_info.indirect = NULL;
    }
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index de9de05..03a37f0 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -210,21 +210,21 @@ The integer capabilities:
 * ``PIPE_CAP_MAX_VERTEX_STREAMS``: The maximum number of vertex streams
   supported by the geometry shader. If stream-out is supported, this should be
   at least 1. If stream-out is not supported, this should be 0.
 * ``PIPE_CAP_DRAW_INDIRECT``: Whether the driver supports taking draw arguments
   { count, instance_count, start, index_bias } from a PIPE_BUFFER resource.
   See pipe_draw_info.
 * ``PIPE_CAP_MULTI_DRAW_INDIRECT``: Whether the driver supports
   pipe_draw_info::indirect_stride and ::indirect_count
 * ``PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS``: Whether the driver supports
   taking the number of indirect draws from a separate parameter
-  buffer, see pipe_draw_info::indirect_params.
+  buffer, see pipe_draw_indirect_info::indirect_draw_count.
 * ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE``: Whether the fragment shader supports
   the FINE versions of DDX/DDY.
 * ``PIPE_CAP_VENDOR_ID``: The vendor ID of the underlying hardware. If it's
   not available one should return 0xFFFFFFFF.
 * ``PIPE_CAP_DEVICE_ID``: The device ID (PCI ID) of the underlying hardware.
   0xFFFFFFFF if not available.
 * ``PIPE_CAP_ACCELERATED``: Whether the renderer is hardware accelerated.
 * ``PIPE_CAP_VIDEO_MEMORY``: The amount of video memory in megabytes.
 * ``PIPE_CAP_UMA``: If the device has a unified memory architecture or on-card
   memory and GART.
diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c
index a70187d..7ffbb44 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -211,24 +211,26 @@ dd_dump_draw_vbo(struct dd_draw_state *dstate, struct pipe_draw_info *info, FILE
 
    DUMP(draw_info, info);
    if (info->indexed) {
       DUMP(index_buffer, &dstate->index_buffer);
       if (dstate->index_buffer.buffer)
          DUMP_M(resource, &dstate->index_buffer, buffer);
    }
    if (info->count_from_stream_output)
       DUMP_M(stream_output_target, info,
              count_from_stream_output);
-   if (info->indirect)
-      DUMP_M(resource, info, indirect);
-   if (info->indirect_params)
-      DUMP_M(resource, info, indirect_params);
+   if (info->indirect) {
+      DUMP_M(resource, info, indirect->buffer);
+      if (info->indirect->indirect_draw_count)
+         DUMP_M(resource, info, indirect->indirect_draw_count);
+   }
+
    fprintf(f, "\n");
 
    /* TODO: dump active queries */
 
    dd_dump_render_condition(dstate, f);
 
    for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
       if (dstate->vertex_buffers[i].buffer.resource) {
          DUMP_I(vertex_buffer, &dstate->vertex_buffers[i], i);
          if (!dstate->vertex_buffers[i].is_user_buffer)
@@ -481,21 +483,21 @@ dd_dump_driver_state(struct dd_context *dctx, FILE *f, unsigned flags)
 	   fprintf(f, "Driver-specific state:\n\n");
 	   dctx->pipe->dump_debug_state(dctx->pipe, f, flags);
    }
 }
 
 static void
 dd_dump_call(FILE *f, struct dd_draw_state *state, struct dd_call *call)
 {
    switch (call->type) {
    case CALL_DRAW_VBO:
-      dd_dump_draw_vbo(state, &call->info.draw_vbo, f);
+      dd_dump_draw_vbo(state, &call->info.draw_vbo.draw, f);
       break;
    case CALL_LAUNCH_GRID:
       dd_dump_launch_grid(state, &call->info.launch_grid, f);
       break;
    case CALL_RESOURCE_COPY_REGION:
       dd_dump_resource_copy_region(state,
                                    &call->info.resource_copy_region, f);
       break;
    case CALL_BLIT:
       dd_dump_blit(state, &call->info.blit, f);
@@ -600,23 +602,23 @@ dd_flush_and_handle_hang(struct dd_context *dctx,
       /* Terminate the process to prevent future hangs. */
       dd_kill_process();
    }
 }
 
 static void
 dd_unreference_copy_of_call(struct dd_call *dst)
 {
    switch (dst->type) {
    case CALL_DRAW_VBO:
-      pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output, NULL);
-      pipe_resource_reference(&dst->info.draw_vbo.indirect, NULL);
-      pipe_resource_reference(&dst->info.draw_vbo.indirect_params, NULL);
+      pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output, NULL);
+      pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer, NULL);
+      pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count, NULL);
       break;
    case CALL_LAUNCH_GRID:
       pipe_resource_reference(&dst->info.launch_grid.indirect, NULL);
       break;
    case CALL_RESOURCE_COPY_REGION:
       pipe_resource_reference(&dst->info.resource_copy_region.dst, NULL);
       pipe_resource_reference(&dst->info.resource_copy_region.src, NULL);
       break;
    case CALL_BLIT:
       pipe_resource_reference(&dst->info.blit.dst.resource, NULL);
@@ -642,27 +644,31 @@ dd_unreference_copy_of_call(struct dd_call *dst)
    }
 }
 
 static void
 dd_copy_call(struct dd_call *dst, struct dd_call *src)
 {
    dst->type = src->type;
 
    switch (src->type) {
    case CALL_DRAW_VBO:
-      pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output,
-                               src->info.draw_vbo.count_from_stream_output);
-      pipe_resource_reference(&dst->info.draw_vbo.indirect,
-                              src->info.draw_vbo.indirect);
-      pipe_resource_reference(&dst->info.draw_vbo.indirect_params,
-                              src->info.draw_vbo.indirect_params);
+      pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output,
+                               src->info.draw_vbo.draw.count_from_stream_output);
+      pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer,
+                              src->info.draw_vbo.indirect.buffer);
+      pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count,
+                              src->info.draw_vbo.indirect.indirect_draw_count);
       dst->info.draw_vbo = src->info.draw_vbo;
+      if (!src->info.draw_vbo.draw.indirect)
+         dst->info.draw_vbo.draw.indirect = NULL;
+      else
+         dst->info.draw_vbo.draw.indirect = &dst->info.draw_vbo.indirect;
       break;
    case CALL_LAUNCH_GRID:
       pipe_resource_reference(&dst->info.launch_grid.indirect,
                               src->info.launch_grid.indirect);
       dst->info.launch_grid = src->info.launch_grid;
       break;
    case CALL_RESOURCE_COPY_REGION:
       pipe_resource_reference(&dst->info.resource_copy_region.dst,
                               src->info.resource_copy_region.dst);
       pipe_resource_reference(&dst->info.resource_copy_region.src,
@@ -1154,21 +1160,27 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call)
 
 static void
 dd_context_draw_vbo(struct pipe_context *_pipe,
                     const struct pipe_draw_info *info)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
    struct dd_call call;
 
    call.type = CALL_DRAW_VBO;
-   call.info.draw_vbo = *info;
+   call.info.draw_vbo.draw = *info;
+   if (info->indirect) {
+      call.info.draw_vbo.indirect = *info->indirect;
+      call.info.draw_vbo.draw.indirect = &call.info.draw_vbo.indirect;
+   } else {
+      memset(&call.info.draw_vbo.indirect, 0, sizeof(*info->indirect));
+   }
 
    dd_before_draw(dctx);
    pipe->draw_vbo(pipe, info);
    dd_after_draw(dctx, &call);
 }
 
 static void
 dd_context_launch_grid(struct pipe_context *_pipe,
                        const struct pipe_grid_info *info)
 {
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h
index deae1f5..ea33193 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -97,26 +97,31 @@ struct call_clear_buffer
 
 struct call_generate_mipmap {
    struct pipe_resource *res;
    enum pipe_format format;
    unsigned base_level;
    unsigned last_level;
    unsigned first_layer;
    unsigned last_layer;
 };
 
+struct call_draw_info {
+   struct pipe_draw_info draw;
+   struct pipe_draw_indirect_info indirect;
+};
+
 struct dd_call
 {
    enum call_type type;
 
    union {
-      struct pipe_draw_info draw_vbo;
+      struct call_draw_info draw_vbo;
       struct pipe_grid_info launch_grid;
       struct call_resource_copy_region resource_copy_region;
       struct pipe_blit_info blit;
       struct pipe_resource *flush_resource;
       struct call_clear clear;
       struct call_clear_buffer clear_buffer;
       struct call_generate_mipmap generate_mipmap;
    } info;
 };
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index b42b468..7cea5fb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -811,24 +811,24 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
       IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
 
       mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
    }
 }
 
 static void
 nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   struct nv04_resource *buf = nv04_resource(info->indirect);
-   struct nv04_resource *buf_count = nv04_resource(info->indirect_params);
-   unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
-   uint32_t offset = buf->offset + info->indirect_offset;
+   struct nv04_resource *buf = nv04_resource(info->indirect->buffer);
+   struct nv04_resource *buf_count = nv04_resource(info->indirect->indirect_draw_count);
+   unsigned size, macro, count = info->indirect->draw_count, drawid = info->drawid;
+   uint32_t offset = buf->offset + info->indirect->offset;
    struct nvc0_screen *screen = nvc0->screen;
 
    PUSH_SPACE(push, 7);
 
    /* must make FIFO wait for engines idle before continuing to process */
    if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
        (buf_count && buf_count->fence_wr &&
         !nouveau_fence_signalled(buf_count->fence_wr))) {
       IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
    }
@@ -863,54 +863,54 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
          macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT;
    }
 
    /* If the stride is not the natural stride, we have to stick a separate
     * push data reference for each draw. Otherwise it can all go in as one.
     * Of course there is a maximum packet size, so we have to break things up
     * along those borders as well.
     */
    while (count) {
       unsigned draws = count, pushes, i;
-      if (info->indirect_stride == size * 4) {
+      if (info->indirect->stride == size * 4) {
          draws = MIN2(draws, (NV04_PFIFO_MAX_PACKET_LEN - 4) / size);
          pushes = 1;
       } else {
          draws = MIN2(draws, 32);
          pushes = draws;
       }
 
       nouveau_pushbuf_space(push, 16, 0, pushes + !!buf_count);
       PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
       if (buf_count)
          PUSH_REFN(push, buf_count->bo, NOUVEAU_BO_RD | buf_count->domain);
       PUSH_DATA(push,
                 NVC0_FIFO_PKHDR_1I(0, macro, 3 + !!buf_count + draws * size));
       PUSH_DATA(push, nvc0_prim_gl(info->mode));
       PUSH_DATA(push, drawid);
       PUSH_DATA(push, draws);
       if (buf_count) {
          nouveau_pushbuf_data(push,
                               buf_count->bo,
-                              buf_count->offset + info->indirect_params_offset,
+                              buf_count->offset + info->indirect->indirect_draw_count_offset,
                               NVC0_IB_ENTRY_1_NO_PREFETCH | 4);
       }
       if (pushes == 1) {
          nouveau_pushbuf_data(push,
                               buf->bo, offset,
                               NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4 * draws));
-         offset += draws * info->indirect_stride;
+         offset += draws * info->indirect->stride;
       } else {
          for (i = 0; i < pushes; i++) {
             nouveau_pushbuf_data(push,
                                  buf->bo, offset,
                                  NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4));
-            offset += info->indirect_stride;
+            offset += info->indirect->stride;
          }
       }
       count -= draws;
       drawid += draws;
    }
 }
 
 static inline void
 nvc0_update_prim_restart(struct nvc0_context *nvc0, bool en, uint32_t index)
 {
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index f3011c8..ee6fd26 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1763,25 +1763,25 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			unsigned out_offset;
 			void *ptr;
 			unsigned start, count;
 
 			if (likely(!info->indirect)) {
 				start = 0;
 				count = info->count;
 			}
 			else {
 				/* Have to get start/count from indirect buffer, slow path ahead... */
-				struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect;
+				struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect->buffer;
 				unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource,
 					PIPE_TRANSFER_READ);
 				if (data) {
-					data += info->indirect_offset / sizeof(unsigned);
+					data += info->indirect->offset / sizeof(unsigned);
 					start = data[2] * ib.index_size;
 					count = data[0];
 				}
 				else {
 					start = 0;
 					count = 0;
 				}
 			}
 
 			u_upload_alloc(ctx->stream_uploader, start, count * 2,
@@ -1911,35 +1911,35 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				      r600_conv_pipe_prim(info->mode));
 
 		rctx->last_primitive_type = info->mode;
 	}
 
 	/* Draw packets. */
 	if (likely(!info->indirect)) {
 		radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
 		radeon_emit(cs, info->instance_count);
 	} else {
-		uint64_t va = r600_resource(info->indirect)->gpu_address;
+		uint64_t va = r600_resource(info->indirect->buffer)->gpu_address;
 		assert(rctx->b.chip_class >= EVERGREEN);
 
 		// Invalidate so non-indirect draw calls reset this state
 		rctx->vgt_state.last_draw_was_indirect = true;
 		rctx->last_start_instance = -1;
 
 		radeon_emit(cs, PKT3(EG_PKT3_SET_BASE, 2, 0));
 		radeon_emit(cs, EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE);
 		radeon_emit(cs, va);
 		radeon_emit(cs, (va >> 32UL) & 0xFF);
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
-							  (struct r600_resource*)info->indirect,
+							  (struct r600_resource*)info->indirect->buffer,
 							  RADEON_USAGE_READ,
                                                           RADEON_PRIO_DRAW_INDIRECT));
 	}
 
 	if (info->indexed) {
 		radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
 		radeon_emit(cs, ib.index_size == 4 ?
 				(VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) :
 				(VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0)));
 
@@ -1975,21 +1975,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 				radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 									  (struct r600_resource*)ib.buffer,
 									  RADEON_USAGE_READ,
                                                                           RADEON_PRIO_INDEX_BUFFER));
 
 				radeon_emit(cs, PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0));
 				radeon_emit(cs, max_size);
 
 				radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit));
-				radeon_emit(cs, info->indirect_offset);
+				radeon_emit(cs, info->indirect->offset);
 				radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
 			}
 		}
 	} else {
 		if (unlikely(info->count_from_stream_output)) {
 			struct r600_so_target *t = (struct r600_so_target*)info->count_from_stream_output;
 			uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
 
 			radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
 
@@ -2005,21 +2005,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 								  t->buf_filled_size, RADEON_USAGE_READ,
 								  RADEON_PRIO_SO_FILLED_SIZE));
 		}
 
 		if (likely(!info->indirect)) {
 			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
 			radeon_emit(cs, info->count);
 		}
 		else {
 			radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit));
-			radeon_emit(cs, info->indirect_offset);
+			radeon_emit(cs, info->indirect->offset);
 		}
 		radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
 				(info->count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0));
 	}
 
 	/* SMX returns CONTEXT_DONE too early workaround */
 	if (rctx->b.family == CHIP_R600 ||
 	    rctx->b.family == CHIP_RV610 ||
 	    rctx->b.family == CHIP_RV630 ||
 	    rctx->b.family == CHIP_RV635) {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index e6a9ee0..70b6ed8 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -603,20 +603,21 @@ static void si_emit_draw_registers(struct si_context *sctx,
 		radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
 				       info->restart_index);
 		sctx->last_restart_index = info->restart_index;
 	}
 }
 
 static void si_emit_draw_packets(struct si_context *sctx,
 				 const struct pipe_draw_info *info,
 				 const struct pipe_index_buffer *ib)
 {
+	struct pipe_draw_indirect_info *indirect = info->indirect;
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
 	bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
 	uint32_t index_max_size = 0;
 	uint64_t index_va = 0;
 
 	if (info->count_from_stream_output) {
 		struct r600_so_target *t =
 			(struct r600_so_target*)info->count_from_stream_output;
 		uint64_t va = t->buf_filled_size->gpu_address +
@@ -683,85 +684,85 @@ static void si_emit_draw_packets(struct si_context *sctx,
 				      (struct r600_resource *)ib->buffer,
 				      RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
 	} else {
 		/* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
 		 * so the state must be re-emitted before the next indexed draw.
 		 */
 		if (sctx->b.chip_class >= CIK)
 			sctx->last_index_size = -1;
 	}
 
-	if (info->indirect) {
-		uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
+	if (indirect) {
+		uint64_t indirect_va = r600_resource(indirect->buffer)->gpu_address;
 
 		assert(indirect_va % 8 == 0);
 
 		si_invalidate_draw_sh_constants(sctx);
 
 		radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
 		radeon_emit(cs, 1);
 		radeon_emit(cs, indirect_va);
 		radeon_emit(cs, indirect_va >> 32);
 
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-				      (struct r600_resource *)info->indirect,
+				      (struct r600_resource *)indirect->buffer,
 				      RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
 		unsigned di_src_sel = info->indexed ? V_0287F0_DI_SRC_SEL_DMA
 						    : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
 
-		assert(info->indirect_offset % 4 == 0);
+		assert(indirect->offset % 4 == 0);
 
 		if (info->indexed) {
 			radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
 			radeon_emit(cs, index_va);
 			radeon_emit(cs, index_va >> 32);
 
 			radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
 			radeon_emit(cs, index_max_size);
 		}
 
 		if (!sctx->screen->has_draw_indirect_multi) {
 			radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT
 							   : PKT3_DRAW_INDIRECT,
 					     3, render_cond_bit));
-			radeon_emit(cs, info->indirect_offset);
+			radeon_emit(cs, indirect->offset);
 			radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
 			radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
 			radeon_emit(cs, di_src_sel);
 		} else {
 			uint64_t count_va = 0;
 
-			if (info->indirect_params) {
+			if (indirect->indirect_draw_count) {
 				struct r600_resource *params_buf =
-					(struct r600_resource *)info->indirect_params;
+					(struct r600_resource *)indirect->indirect_draw_count;
 
 				radeon_add_to_buffer_list(
 					&sctx->b, &sctx->b.gfx, params_buf,
 					RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
-				count_va = params_buf->gpu_address + info->indirect_params_offset;
+				count_va = params_buf->gpu_address + indirect->indirect_draw_count_offset;
 			}
 
 			radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
 							     PKT3_DRAW_INDIRECT_MULTI,
 					     8, render_cond_bit));
-			radeon_emit(cs, info->indirect_offset);
+			radeon_emit(cs, indirect->offset);
 			radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
 			radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
 			radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
 					S_2C3_DRAW_INDEX_ENABLE(1) |
-					S_2C3_COUNT_INDIRECT_ENABLE(!!info->indirect_params));
-			radeon_emit(cs, info->indirect_count);
+					S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
+			radeon_emit(cs, indirect->draw_count);
 			radeon_emit(cs, count_va);
 			radeon_emit(cs, count_va >> 32);
-			radeon_emit(cs, info->indirect_stride);
+			radeon_emit(cs, indirect->stride);
 			radeon_emit(cs, di_src_sel);
 		}
 	} else {
 		int base_vertex;
 
 		radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
 		radeon_emit(cs, info->instance_count);
 
 		/* Base vertex and start instance. */
 		base_vertex = info->indexed ? info->index_bias : info->start;
@@ -1048,64 +1049,66 @@ void si_emit_cache_flush(struct si_context *sctx)
 			        EVENT_INDEX(0));
 	}
 
 	rctx->flags = 0;
 }
 
 static void si_get_draw_start_count(struct si_context *sctx,
 				    const struct pipe_draw_info *info,
 				    unsigned *start, unsigned *count)
 {
-	if (info->indirect) {
+	struct pipe_draw_indirect_info *indirect = info->indirect;
+
+	if (indirect) {
 		unsigned indirect_count;
 		struct pipe_transfer *transfer;
 		unsigned begin, end;
 		unsigned map_size;
 		unsigned *data;
 
-		if (info->indirect_params) {
+		if (indirect->indirect_draw_count) {
 			data = pipe_buffer_map_range(&sctx->b.b,
-					info->indirect_params,
-					info->indirect_params_offset,
+					indirect->indirect_draw_count,
+					indirect->indirect_draw_count_offset,
 					sizeof(unsigned),
 					PIPE_TRANSFER_READ, &transfer);
 
 			indirect_count = *data;
 
 			pipe_buffer_unmap(&sctx->b.b, transfer);
 		} else {
-			indirect_count = info->indirect_count;
+			indirect_count = indirect->draw_count;
 		}
 
 		if (!indirect_count) {
 			*start = *count = 0;
 			return;
 		}
 
-		map_size = (indirect_count - 1) * info->indirect_stride + 3 * sizeof(unsigned);
-		data = pipe_buffer_map_range(&sctx->b.b, info->indirect,
-					     info->indirect_offset, map_size,
+		map_size = (indirect_count - 1) * indirect->stride + 3 * sizeof(unsigned);
+		data = pipe_buffer_map_range(&sctx->b.b, indirect->buffer,
+					     indirect->offset, map_size,
 					     PIPE_TRANSFER_READ, &transfer);
 
 		begin = UINT_MAX;
 		end = 0;
 
 		for (unsigned i = 0; i < indirect_count; ++i) {
 			unsigned count = data[0];
 			unsigned start = data[2];
 
 			if (count > 0) {
 				begin = MIN2(begin, start);
 				end = MAX2(end, start + count);
 			}
 
-			data += info->indirect_stride / sizeof(unsigned);
+			data += indirect->stride / sizeof(unsigned);
 		}
 
 		pipe_buffer_unmap(&sctx->b.b, transfer);
 
 		if (begin < end) {
 			*start = begin;
 			*count = end - begin;
 		} else {
 			*start = *count = 0;
 		}
@@ -1277,32 +1280,34 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		} else if (sctx->b.chip_class <= CIK &&
 			   r600_resource(ib->buffer)->TC_L2_dirty) {
 			/* VI reads index buffers through TC L2, so it doesn't
 			 * need this. */
 			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 			r600_resource(ib->buffer)->TC_L2_dirty = false;
 		}
 	}
 
 	if (info->indirect) {
+		struct pipe_draw_indirect_info *indirect = info->indirect;
+
 		/* Add the buffer size for memory checking in need_cs_space. */
-		r600_context_add_resource_size(ctx, info->indirect);
+		r600_context_add_resource_size(ctx, indirect->buffer);
 
-		if (r600_resource(info->indirect)->TC_L2_dirty) {
+		if (r600_resource(indirect->buffer)->TC_L2_dirty) {
 			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
-			r600_resource(info->indirect)->TC_L2_dirty = false;
+			r600_resource(indirect->buffer)->TC_L2_dirty = false;
 		}
 
-		if (info->indirect_params &&
-		    r600_resource(info->indirect_params)->TC_L2_dirty) {
+		if (indirect->indirect_draw_count &&
+		    r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
 			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
-			r600_resource(info->indirect_params)->TC_L2_dirty = false;
+			r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
 		}
 	}
 
 	si_need_cs_space(sctx);
 
 	/* Since we've called r600_context_add_resource_size for vertex buffers,
 	 * this must be called after si_need_cs_space, because we must let
 	 * need_cs_space flush before we add buffers to the buffer list.
 	 */
 	if (!si_upload_vertex_buffer_descriptors(sctx))
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 13c0a9d..e4a5e3b 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -805,22 +805,30 @@ void trace_dump_draw_info(const struct pipe_draw_info *state)
 
    trace_dump_member(int,  state, index_bias);
    trace_dump_member(uint, state, min_index);
    trace_dump_member(uint, state, max_index);
 
    trace_dump_member(bool, state, primitive_restart);
    trace_dump_member(uint, state, restart_index);
 
    trace_dump_member(ptr, state, count_from_stream_output);
 
-   trace_dump_member(ptr, state, indirect);
-   trace_dump_member(uint, state, indirect_offset);
+   if (!state->indirect) {
+      trace_dump_member(ptr, state, indirect);
+   } else {
+      trace_dump_member(uint, state, indirect->offset);
+      trace_dump_member(uint, state, indirect->stride);
+      trace_dump_member(uint, state, indirect->draw_count);
+      trace_dump_member(uint, state, indirect->indirect_draw_count_offset);
+      trace_dump_member(ptr, state, indirect->buffer);
+      trace_dump_member(ptr, state, indirect->indirect_draw_count);
+   }
 
    trace_dump_struct_end();
 }
 
 void trace_dump_blit_info(const struct pipe_blit_info *info)
 {
    char mask[7];
 
    if (!trace_dumping_enabled_locked())
       return;
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 0c9b4b4..3cfdd34 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -634,20 +634,54 @@ struct pipe_vertex_element
  */
 struct pipe_index_buffer
 {
    unsigned index_size;  /**< size of an index, in bytes */
    unsigned offset;  /**< offset to start of data in buffer, in bytes */
    struct pipe_resource *buffer; /**< the actual buffer */
    const void *user_buffer;  /**< pointer to a user buffer if buffer == NULL */
 };
 
 
+struct pipe_draw_indirect_info
+{
+   unsigned offset; /**< must be 4 byte aligned */
+   unsigned stride; /**< must be 4 byte aligned */
+   unsigned draw_count; /**< number of indirect draws */
+   unsigned indirect_draw_count_offset; /**< must be 4 byte aligned */
+
+   /* Indirect draw parameters resource is laid out as follows:
+    *
+    * if indexed is TRUE:
+    *  struct {
+    *     uint32_t count;
+    *     uint32_t instance_count;
+    *     uint32_t start;
+    *     int32_t index_bias;
+    *     uint32_t start_instance;
+    *  };
+    * otherwise:
+    *  struct {
+    *     uint32_t count;
+    *     uint32_t instance_count;
+    *     uint32_t start;
+    *     uint32_t start_instance;
+    *  };
+    */
+   struct pipe_resource *buffer;
+
+   /* Indirect draw count resource: If not NULL, contains a 32-bit value which
+    * is to be used as the real draw_count.
+    */
+   struct pipe_resource *indirect_draw_count;
+};
+
+
 /**
  * Information to describe a draw_vbo call.
  */
 struct pipe_draw_info
 {
    boolean indexed;  /**< use index buffer */
    enum pipe_prim_type mode:8;  /**< the mode of the primitive */
    boolean primitive_restart;
    ubyte vertices_per_patch; /**< the number of vertices per patch */
 
@@ -664,54 +698,23 @@ struct pipe_draw_info
     */
    int index_bias; /**< a bias to be added to each index */
    unsigned min_index; /**< the min index */
    unsigned max_index; /**< the max index */
 
    /**
     * Primitive restart enable/index (only applies to indexed drawing)
     */
    unsigned restart_index;
 
-   unsigned indirect_offset; /**< must be 4 byte aligned */
-   unsigned indirect_stride; /**< must be 4 byte aligned */
-   unsigned indirect_count; /**< number of indirect draws */
-
-   unsigned indirect_params_offset; /**< must be 4 byte aligned */
-
    /* Pointers must be at the end for an optimal structure layout on 64-bit. */
 
-   /* Indirect draw parameters resource: If not NULL, most values are taken
-    * from this buffer instead, which is laid out as follows:
-    *
-    * if indexed is TRUE:
-    *  struct {
-    *     uint32_t count;
-    *     uint32_t instance_count;
-    *     uint32_t start;
-    *     int32_t index_bias;
-    *     uint32_t start_instance;
-    *  };
-    * otherwise:
-    *  struct {
-    *     uint32_t count;
-    *     uint32_t instance_count;
-    *     uint32_t start;
-    *     uint32_t start_instance;
-    *  };
-    */
-   struct pipe_resource *indirect;
-
-   /* Indirect draw count resource: If not NULL, contains a 32-bit value which
-    * is to be used as the real indirect_count. In that case indirect_count
-    * becomes the maximum possible value.
-    */
-   struct pipe_resource *indirect_params;
+   struct pipe_draw_indirect_info *indirect; /**< Indirect draw. */
 
    /**
     * Stream output target. If not NULL, it's used to provide the 'count'
     * parameter based on the number vertices captured by the stream output
     * stage. (or generally, based on the number of bytes captured)
     *
     * Only 'mode', 'start_instance', and 'instance_count' are taken into
     * account, all the other variables from pipe_draw_info are ignored.
     *
     * 'start' is implicitly 0 and 'count' is set as discussed above.
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 6f97ddd..6390735 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -3024,21 +3024,20 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
         return D3DERR_DRIVERINTERNALERROR;
     }
 
     draw.mode = PIPE_PRIM_POINTS;
     draw.count = VertexCount;
     draw.start_instance = 0;
     draw.primitive_restart = FALSE;
     draw.restart_index = 0;
     draw.count_from_stream_output = NULL;
     draw.indirect = NULL;
-    draw.indirect_params = NULL;
     draw.instance_count = 1;
     draw.indexed = FALSE;
     draw.start = 0;
     draw.index_bias = 0;
     draw.min_index = 0;
     draw.max_index = VertexCount - 1;
 
 
     pipe_sw->set_stream_output_targets(pipe_sw, 1, &target, offsets);
 
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 2046d9d..3b1cd7c 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -2552,21 +2552,20 @@ init_draw_info(struct pipe_draw_info *info,
     info->mode = d3dprimitivetype_to_pipe_prim(type);
     info->count = prim_count_to_vertex_count(type, count);
     info->start_instance = 0;
     info->instance_count = 1;
     if (dev->context.stream_instancedata_mask & dev->context.stream_usage_mask)
         info->instance_count = MAX2(dev->context.stream_freq[0] & 0x7FFFFF, 1);
     info->primitive_restart = FALSE;
     info->restart_index = 0;
     info->count_from_stream_output = NULL;
     info->indirect = NULL;
-    info->indirect_params = NULL;
 }
 
 CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,
                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
                   ARG_VAL(UINT, StartVertex),
                   ARG_VAL(UINT, PrimitiveCount))
 {
     struct nine_context *context = &device->context;
     struct pipe_draw_info info;
 
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 5c9f7ea..29381b6 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -257,76 +257,79 @@ st_indirect_draw_vbo(struct gl_context *ctx,
                      struct gl_buffer_object *indirect_data,
                      GLsizeiptr indirect_offset,
                      unsigned draw_count,
                      unsigned stride,
                      struct gl_buffer_object *indirect_params,
                      GLsizeiptr indirect_params_offset,
                      const struct _mesa_index_buffer *ib)
 {
    struct st_context *st = st_context(ctx);
    struct pipe_draw_info info;
+   struct pipe_draw_indirect_info indirect;
 
    /* Mesa core state should have been validated already */
    assert(ctx->NewState == 0x0);
    assert(stride);
 
    st_invalidate_readpix_cache(st);
 
    /* Validate state. */
    if ((st->dirty | ctx->NewDriverState) & ST_PIPELINE_RENDER_STATE_MASK ||
        st->gfx_shaders_may_be_dirty) {
       st_validate_state(st, ST_PIPELINE_RENDER);
    }
 
    if (st->vertex_array_out_of_memory) {
       return;
    }
 
+   memset(&indirect, 0, sizeof(indirect));
    util_draw_init_info(&info);
 
    if (ib) {
       setup_index_buffer(st, ib);
 
       info.indexed = TRUE;
 
       /* Primitive restart is not handled by the VBO module in this case. */
       setup_primitive_restart(ctx, &info, ib->index_size);
    }
 
    info.mode = translate_prim(ctx, mode);
    info.vertices_per_patch = ctx->TessCtrlProgram.patch_vertices;
-   info.indirect = st_buffer_object(indirect_data)->buffer;
-   info.indirect_offset = indirect_offset;
+   info.indirect = &indirect;
+   indirect.buffer = st_buffer_object(indirect_data)->buffer;
+   indirect.offset = indirect_offset;
 
    if (ST_DEBUG & DEBUG_DRAW) {
       debug_printf("st/draw indirect: mode %s drawcount %d indexed %d\n",
                    u_prim_name(info.mode),
                    draw_count,
                    info.indexed);
    }
 
    if (!st->has_multi_draw_indirect) {
       int i;
 
       assert(!indirect_params);
-      info.indirect_count = 1;
+      indirect.draw_count = 1;
       for (i = 0; i < draw_count; i++) {
          info.drawid = i;
          cso_draw_vbo(st->cso_context, &info);
-         info.indirect_offset += stride;
+         indirect.offset += stride;
       }
    } else {
-      info.indirect_count = draw_count;
-      info.indirect_stride = stride;
+      indirect.draw_count = draw_count;
+      indirect.stride = stride;
       if (indirect_params) {
-         info.indirect_params = st_buffer_object(indirect_params)->buffer;
-         info.indirect_params_offset = indirect_params_offset;
+         indirect.indirect_draw_count = st_buffer_object(indirect_params)->buffer;
+         indirect.indirect_draw_count_offset = indirect_params_offset;
       }
       cso_draw_vbo(st->cso_context, &info);
    }
 }
 
 
 void
 st_init_draw(struct st_context *st)
 {
    struct gl_context *ctx = st->ctx;
-- 
2.7.4



More information about the mesa-dev mailing list