[Mesa-dev] [PATCH 2/4] gallium: separate indirect stuff from pipe_draw_info - 80 -> 56 bytes
Marek Olšák
maraeo at gmail.com
Fri Apr 28 23:12:07 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
For faster initialization of non-indirect draws.
---
src/gallium/auxiliary/util/u_draw.c | 4 +-
src/gallium/auxiliary/util/u_dump_state.c | 15 ++++---
src/gallium/auxiliary/util/u_vbuf.c | 8 ++--
src/gallium/docs/source/screen.rst | 2 +-
src/gallium/drivers/ddebug/dd_draw.c | 42 ++++++++++-------
src/gallium/drivers/ddebug/dd_pipe.h | 7 ++-
src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 16 +++----
src/gallium/drivers/r600/r600_state_common.c | 12 ++---
src/gallium/drivers/radeonsi/si_state_draw.c | 59 +++++++++++++-----------
src/gallium/drivers/trace/tr_dump_state.c | 12 ++++-
src/gallium/include/pipe/p_state.h | 67 +++++++++++++++-------------
src/gallium/state_trackers/nine/device9.c | 1 -
src/gallium/state_trackers/nine/nine_state.c | 1 -
src/mesa/state_tracker/st_draw.c | 19 ++++----
14 files changed, 152 insertions(+), 113 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_draw.c b/src/gallium/auxiliary/util/u_draw.c
index ca78648..e7abbfc 100644
--- a/src/gallium/auxiliary/util/u_draw.c
+++ b/src/gallium/auxiliary/util/u_draw.c
@@ -138,22 +138,22 @@ util_draw_indirect(struct pipe_context *pipe,
uint32_t *params;
const unsigned num_params = info_in->indexed ? 5 : 4;
assert(info_in->indirect);
assert(!info_in->count_from_stream_output);
memcpy(&info, info_in, sizeof(info));
params = (uint32_t *)
pipe_buffer_map_range(pipe,
- info_in->indirect,
- info_in->indirect_offset,
+ info_in->indirect->buffer,
+ info_in->indirect->offset,
num_params * sizeof(uint32_t),
PIPE_TRANSFER_READ,
&transfer);
if (!transfer) {
debug_printf("%s: failed to map indirect buffer\n", __FUNCTION__);
return;
}
info.count = params[0];
info.instance_count = params[1];
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index 0af81f7..9c32557 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -932,25 +932,30 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state)
util_dump_member(stream, int, state, index_bias);
util_dump_member(stream, uint, state, min_index);
util_dump_member(stream, uint, state, max_index);
util_dump_member(stream, bool, state, primitive_restart);
util_dump_member(stream, uint, state, restart_index);
util_dump_member(stream, ptr, state, count_from_stream_output);
- util_dump_member(stream, ptr, state, indirect);
- util_dump_member(stream, uint, state, indirect_offset);
- util_dump_member(stream, uint, state, indirect_stride);
- util_dump_member(stream, uint, state, indirect_count);
- util_dump_member(stream, uint, state, indirect_params_offset);
+ if (!state->indirect) {
+ util_dump_member(stream, ptr, state, indirect);
+ } else {
+ util_dump_member(stream, uint, state, indirect->offset);
+ util_dump_member(stream, uint, state, indirect->stride);
+ util_dump_member(stream, uint, state, indirect->draw_count);
+ util_dump_member(stream, uint, state, indirect->indirect_draw_count_offset);
+ util_dump_member(stream, ptr, state, indirect->buffer);
+ util_dump_member(stream, ptr, state, indirect->indirect_draw_count);
+ }
util_dump_struct_end(stream);
}
void util_dump_box(FILE *stream, const struct pipe_box *box)
{
if (!box) {
util_dump_null(stream);
return;
}
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index 62b88ac..9d6d529 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -1161,29 +1161,29 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
}
new_info = *info;
/* Fallback. We need to know all the parameters. */
if (new_info.indirect) {
struct pipe_transfer *transfer = NULL;
int *data;
if (new_info.indexed) {
- data = pipe_buffer_map_range(pipe, new_info.indirect,
- new_info.indirect_offset, 20,
+ data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
+ new_info.indirect->offset, 20,
PIPE_TRANSFER_READ, &transfer);
new_info.index_bias = data[3];
new_info.start_instance = data[4];
}
else {
- data = pipe_buffer_map_range(pipe, new_info.indirect,
- new_info.indirect_offset, 16,
+ data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
+ new_info.indirect->offset, 16,
PIPE_TRANSFER_READ, &transfer);
new_info.start_instance = data[3];
}
new_info.count = data[0];
new_info.instance_count = data[1];
new_info.start = data[2];
pipe_buffer_unmap(pipe, transfer);
new_info.indirect = NULL;
}
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index de9de05..03a37f0 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -210,21 +210,21 @@ The integer capabilities:
* ``PIPE_CAP_MAX_VERTEX_STREAMS``: The maximum number of vertex streams
supported by the geometry shader. If stream-out is supported, this should be
at least 1. If stream-out is not supported, this should be 0.
* ``PIPE_CAP_DRAW_INDIRECT``: Whether the driver supports taking draw arguments
{ count, instance_count, start, index_bias } from a PIPE_BUFFER resource.
See pipe_draw_info.
* ``PIPE_CAP_MULTI_DRAW_INDIRECT``: Whether the driver supports
pipe_draw_info::indirect_stride and ::indirect_count
* ``PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS``: Whether the driver supports
taking the number of indirect draws from a separate parameter
- buffer, see pipe_draw_info::indirect_params.
+ buffer, see pipe_draw_indirect_info::indirect_draw_count.
* ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE``: Whether the fragment shader supports
the FINE versions of DDX/DDY.
* ``PIPE_CAP_VENDOR_ID``: The vendor ID of the underlying hardware. If it's
not available one should return 0xFFFFFFFF.
* ``PIPE_CAP_DEVICE_ID``: The device ID (PCI ID) of the underlying hardware.
0xFFFFFFFF if not available.
* ``PIPE_CAP_ACCELERATED``: Whether the renderer is hardware accelerated.
* ``PIPE_CAP_VIDEO_MEMORY``: The amount of video memory in megabytes.
* ``PIPE_CAP_UMA``: If the device has a unified memory architecture or on-card
memory and GART.
diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c
index a70187d..7ffbb44 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -211,24 +211,26 @@ dd_dump_draw_vbo(struct dd_draw_state *dstate, struct pipe_draw_info *info, FILE
DUMP(draw_info, info);
if (info->indexed) {
DUMP(index_buffer, &dstate->index_buffer);
if (dstate->index_buffer.buffer)
DUMP_M(resource, &dstate->index_buffer, buffer);
}
if (info->count_from_stream_output)
DUMP_M(stream_output_target, info,
count_from_stream_output);
- if (info->indirect)
- DUMP_M(resource, info, indirect);
- if (info->indirect_params)
- DUMP_M(resource, info, indirect_params);
+ if (info->indirect) {
+ DUMP_M(resource, info, indirect->buffer);
+ if (info->indirect->indirect_draw_count)
+ DUMP_M(resource, info, indirect->indirect_draw_count);
+ }
+
fprintf(f, "\n");
/* TODO: dump active queries */
dd_dump_render_condition(dstate, f);
for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
if (dstate->vertex_buffers[i].buffer.resource) {
DUMP_I(vertex_buffer, &dstate->vertex_buffers[i], i);
if (!dstate->vertex_buffers[i].is_user_buffer)
@@ -481,21 +483,21 @@ dd_dump_driver_state(struct dd_context *dctx, FILE *f, unsigned flags)
fprintf(f, "Driver-specific state:\n\n");
dctx->pipe->dump_debug_state(dctx->pipe, f, flags);
}
}
static void
dd_dump_call(FILE *f, struct dd_draw_state *state, struct dd_call *call)
{
switch (call->type) {
case CALL_DRAW_VBO:
- dd_dump_draw_vbo(state, &call->info.draw_vbo, f);
+ dd_dump_draw_vbo(state, &call->info.draw_vbo.draw, f);
break;
case CALL_LAUNCH_GRID:
dd_dump_launch_grid(state, &call->info.launch_grid, f);
break;
case CALL_RESOURCE_COPY_REGION:
dd_dump_resource_copy_region(state,
&call->info.resource_copy_region, f);
break;
case CALL_BLIT:
dd_dump_blit(state, &call->info.blit, f);
@@ -600,23 +602,23 @@ dd_flush_and_handle_hang(struct dd_context *dctx,
/* Terminate the process to prevent future hangs. */
dd_kill_process();
}
}
static void
dd_unreference_copy_of_call(struct dd_call *dst)
{
switch (dst->type) {
case CALL_DRAW_VBO:
- pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output, NULL);
- pipe_resource_reference(&dst->info.draw_vbo.indirect, NULL);
- pipe_resource_reference(&dst->info.draw_vbo.indirect_params, NULL);
+ pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output, NULL);
+ pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer, NULL);
+ pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count, NULL);
break;
case CALL_LAUNCH_GRID:
pipe_resource_reference(&dst->info.launch_grid.indirect, NULL);
break;
case CALL_RESOURCE_COPY_REGION:
pipe_resource_reference(&dst->info.resource_copy_region.dst, NULL);
pipe_resource_reference(&dst->info.resource_copy_region.src, NULL);
break;
case CALL_BLIT:
pipe_resource_reference(&dst->info.blit.dst.resource, NULL);
@@ -642,27 +644,31 @@ dd_unreference_copy_of_call(struct dd_call *dst)
}
}
static void
dd_copy_call(struct dd_call *dst, struct dd_call *src)
{
dst->type = src->type;
switch (src->type) {
case CALL_DRAW_VBO:
- pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output,
- src->info.draw_vbo.count_from_stream_output);
- pipe_resource_reference(&dst->info.draw_vbo.indirect,
- src->info.draw_vbo.indirect);
- pipe_resource_reference(&dst->info.draw_vbo.indirect_params,
- src->info.draw_vbo.indirect_params);
+ pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output,
+ src->info.draw_vbo.draw.count_from_stream_output);
+ pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer,
+ src->info.draw_vbo.indirect.buffer);
+ pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count,
+ src->info.draw_vbo.indirect.indirect_draw_count);
dst->info.draw_vbo = src->info.draw_vbo;
+ if (!src->info.draw_vbo.draw.indirect)
+ dst->info.draw_vbo.draw.indirect = NULL;
+ else
+ dst->info.draw_vbo.draw.indirect = &dst->info.draw_vbo.indirect;
break;
case CALL_LAUNCH_GRID:
pipe_resource_reference(&dst->info.launch_grid.indirect,
src->info.launch_grid.indirect);
dst->info.launch_grid = src->info.launch_grid;
break;
case CALL_RESOURCE_COPY_REGION:
pipe_resource_reference(&dst->info.resource_copy_region.dst,
src->info.resource_copy_region.dst);
pipe_resource_reference(&dst->info.resource_copy_region.src,
@@ -1154,21 +1160,27 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call)
static void
dd_context_draw_vbo(struct pipe_context *_pipe,
const struct pipe_draw_info *info)
{
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
struct dd_call call;
call.type = CALL_DRAW_VBO;
- call.info.draw_vbo = *info;
+ call.info.draw_vbo.draw = *info;
+ if (info->indirect) {
+ call.info.draw_vbo.indirect = *info->indirect;
+ call.info.draw_vbo.draw.indirect = &call.info.draw_vbo.indirect;
+ } else {
+ memset(&call.info.draw_vbo.indirect, 0, sizeof(*info->indirect));
+ }
dd_before_draw(dctx);
pipe->draw_vbo(pipe, info);
dd_after_draw(dctx, &call);
}
static void
dd_context_launch_grid(struct pipe_context *_pipe,
const struct pipe_grid_info *info)
{
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h
index deae1f5..ea33193 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -97,26 +97,31 @@ struct call_clear_buffer
struct call_generate_mipmap {
struct pipe_resource *res;
enum pipe_format format;
unsigned base_level;
unsigned last_level;
unsigned first_layer;
unsigned last_layer;
};
+struct call_draw_info {
+ struct pipe_draw_info draw;
+ struct pipe_draw_indirect_info indirect;
+};
+
struct dd_call
{
enum call_type type;
union {
- struct pipe_draw_info draw_vbo;
+ struct call_draw_info draw_vbo;
struct pipe_grid_info launch_grid;
struct call_resource_copy_region resource_copy_region;
struct pipe_blit_info blit;
struct pipe_resource *flush_resource;
struct call_clear clear;
struct call_clear_buffer clear_buffer;
struct call_generate_mipmap generate_mipmap;
} info;
};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index b42b468..7cea5fb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -811,24 +811,24 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
}
static void
nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- struct nv04_resource *buf = nv04_resource(info->indirect);
- struct nv04_resource *buf_count = nv04_resource(info->indirect_params);
- unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
- uint32_t offset = buf->offset + info->indirect_offset;
+ struct nv04_resource *buf = nv04_resource(info->indirect->buffer);
+ struct nv04_resource *buf_count = nv04_resource(info->indirect->indirect_draw_count);
+ unsigned size, macro, count = info->indirect->draw_count, drawid = info->drawid;
+ uint32_t offset = buf->offset + info->indirect->offset;
struct nvc0_screen *screen = nvc0->screen;
PUSH_SPACE(push, 7);
/* must make FIFO wait for engines idle before continuing to process */
if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
(buf_count && buf_count->fence_wr &&
!nouveau_fence_signalled(buf_count->fence_wr))) {
IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
}
@@ -863,54 +863,54 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT;
}
/* If the stride is not the natural stride, we have to stick a separate
* push data reference for each draw. Otherwise it can all go in as one.
* Of course there is a maximum packet size, so we have to break things up
* along those borders as well.
*/
while (count) {
unsigned draws = count, pushes, i;
- if (info->indirect_stride == size * 4) {
+ if (info->indirect->stride == size * 4) {
draws = MIN2(draws, (NV04_PFIFO_MAX_PACKET_LEN - 4) / size);
pushes = 1;
} else {
draws = MIN2(draws, 32);
pushes = draws;
}
nouveau_pushbuf_space(push, 16, 0, pushes + !!buf_count);
PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
if (buf_count)
PUSH_REFN(push, buf_count->bo, NOUVEAU_BO_RD | buf_count->domain);
PUSH_DATA(push,
NVC0_FIFO_PKHDR_1I(0, macro, 3 + !!buf_count + draws * size));
PUSH_DATA(push, nvc0_prim_gl(info->mode));
PUSH_DATA(push, drawid);
PUSH_DATA(push, draws);
if (buf_count) {
nouveau_pushbuf_data(push,
buf_count->bo,
- buf_count->offset + info->indirect_params_offset,
+ buf_count->offset + info->indirect->indirect_draw_count_offset,
NVC0_IB_ENTRY_1_NO_PREFETCH | 4);
}
if (pushes == 1) {
nouveau_pushbuf_data(push,
buf->bo, offset,
NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4 * draws));
- offset += draws * info->indirect_stride;
+ offset += draws * info->indirect->stride;
} else {
for (i = 0; i < pushes; i++) {
nouveau_pushbuf_data(push,
buf->bo, offset,
NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4));
- offset += info->indirect_stride;
+ offset += info->indirect->stride;
}
}
count -= draws;
drawid += draws;
}
}
static inline void
nvc0_update_prim_restart(struct nvc0_context *nvc0, bool en, uint32_t index)
{
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index f3011c8..ee6fd26 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1763,25 +1763,25 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
unsigned out_offset;
void *ptr;
unsigned start, count;
if (likely(!info->indirect)) {
start = 0;
count = info->count;
}
else {
/* Have to get start/count from indirect buffer, slow path ahead... */
- struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect;
+ struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect->buffer;
unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource,
PIPE_TRANSFER_READ);
if (data) {
- data += info->indirect_offset / sizeof(unsigned);
+ data += info->indirect->offset / sizeof(unsigned);
start = data[2] * ib.index_size;
count = data[0];
}
else {
start = 0;
count = 0;
}
}
u_upload_alloc(ctx->stream_uploader, start, count * 2,
@@ -1911,35 +1911,35 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
r600_conv_pipe_prim(info->mode));
rctx->last_primitive_type = info->mode;
}
/* Draw packets. */
if (likely(!info->indirect)) {
radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
radeon_emit(cs, info->instance_count);
} else {
- uint64_t va = r600_resource(info->indirect)->gpu_address;
+ uint64_t va = r600_resource(info->indirect->buffer)->gpu_address;
assert(rctx->b.chip_class >= EVERGREEN);
// Invalidate so non-indirect draw calls reset this state
rctx->vgt_state.last_draw_was_indirect = true;
rctx->last_start_instance = -1;
radeon_emit(cs, PKT3(EG_PKT3_SET_BASE, 2, 0));
radeon_emit(cs, EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE);
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
- (struct r600_resource*)info->indirect,
+ (struct r600_resource*)info->indirect->buffer,
RADEON_USAGE_READ,
RADEON_PRIO_DRAW_INDIRECT));
}
if (info->indexed) {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cs, ib.index_size == 4 ?
(VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) :
(VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0)));
@@ -1975,21 +1975,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
(struct r600_resource*)ib.buffer,
RADEON_USAGE_READ,
RADEON_PRIO_INDEX_BUFFER));
radeon_emit(cs, PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0));
radeon_emit(cs, max_size);
radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, info->indirect->offset);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
}
}
} else {
if (unlikely(info->count_from_stream_output)) {
struct r600_so_target *t = (struct r600_so_target*)info->count_from_stream_output;
uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
@@ -2005,21 +2005,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
t->buf_filled_size, RADEON_USAGE_READ,
RADEON_PRIO_SO_FILLED_SIZE));
}
if (likely(!info->indirect)) {
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
radeon_emit(cs, info->count);
}
else {
radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, info->indirect->offset);
}
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
(info->count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0));
}
/* SMX returns CONTEXT_DONE too early workaround */
if (rctx->b.family == CHIP_R600 ||
rctx->b.family == CHIP_RV610 ||
rctx->b.family == CHIP_RV630 ||
rctx->b.family == CHIP_RV635) {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index e6a9ee0..70b6ed8 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -603,20 +603,21 @@ static void si_emit_draw_registers(struct si_context *sctx,
radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
info->restart_index);
sctx->last_restart_index = info->restart_index;
}
}
static void si_emit_draw_packets(struct si_context *sctx,
const struct pipe_draw_info *info,
const struct pipe_index_buffer *ib)
{
+ struct pipe_draw_indirect_info *indirect = info->indirect;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
uint32_t index_max_size = 0;
uint64_t index_va = 0;
if (info->count_from_stream_output) {
struct r600_so_target *t =
(struct r600_so_target*)info->count_from_stream_output;
uint64_t va = t->buf_filled_size->gpu_address +
@@ -683,85 +684,85 @@ static void si_emit_draw_packets(struct si_context *sctx,
(struct r600_resource *)ib->buffer,
RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
} else {
/* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
* so the state must be re-emitted before the next indexed draw.
*/
if (sctx->b.chip_class >= CIK)
sctx->last_index_size = -1;
}
- if (info->indirect) {
- uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
+ if (indirect) {
+ uint64_t indirect_va = r600_resource(indirect->buffer)->gpu_address;
assert(indirect_va % 8 == 0);
si_invalidate_draw_sh_constants(sctx);
radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
radeon_emit(cs, 1);
radeon_emit(cs, indirect_va);
radeon_emit(cs, indirect_va >> 32);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- (struct r600_resource *)info->indirect,
+ (struct r600_resource *)indirect->buffer,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
unsigned di_src_sel = info->indexed ? V_0287F0_DI_SRC_SEL_DMA
: V_0287F0_DI_SRC_SEL_AUTO_INDEX;
- assert(info->indirect_offset % 4 == 0);
+ assert(indirect->offset % 4 == 0);
if (info->indexed) {
radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
radeon_emit(cs, index_va);
radeon_emit(cs, index_va >> 32);
radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
radeon_emit(cs, index_max_size);
}
if (!sctx->screen->has_draw_indirect_multi) {
radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT
: PKT3_DRAW_INDIRECT,
3, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, indirect->offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, di_src_sel);
} else {
uint64_t count_va = 0;
- if (info->indirect_params) {
+ if (indirect->indirect_draw_count) {
struct r600_resource *params_buf =
- (struct r600_resource *)info->indirect_params;
+ (struct r600_resource *)indirect->indirect_draw_count;
radeon_add_to_buffer_list(
&sctx->b, &sctx->b.gfx, params_buf,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
- count_va = params_buf->gpu_address + info->indirect_params_offset;
+ count_va = params_buf->gpu_address + indirect->indirect_draw_count_offset;
}
radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
PKT3_DRAW_INDIRECT_MULTI,
8, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, indirect->offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
S_2C3_DRAW_INDEX_ENABLE(1) |
- S_2C3_COUNT_INDIRECT_ENABLE(!!info->indirect_params));
- radeon_emit(cs, info->indirect_count);
+ S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
+ radeon_emit(cs, indirect->draw_count);
radeon_emit(cs, count_va);
radeon_emit(cs, count_va >> 32);
- radeon_emit(cs, info->indirect_stride);
+ radeon_emit(cs, indirect->stride);
radeon_emit(cs, di_src_sel);
}
} else {
int base_vertex;
radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
radeon_emit(cs, info->instance_count);
/* Base vertex and start instance. */
base_vertex = info->indexed ? info->index_bias : info->start;
@@ -1048,64 +1049,66 @@ void si_emit_cache_flush(struct si_context *sctx)
EVENT_INDEX(0));
}
rctx->flags = 0;
}
static void si_get_draw_start_count(struct si_context *sctx,
const struct pipe_draw_info *info,
unsigned *start, unsigned *count)
{
- if (info->indirect) {
+ struct pipe_draw_indirect_info *indirect = info->indirect;
+
+ if (indirect) {
unsigned indirect_count;
struct pipe_transfer *transfer;
unsigned begin, end;
unsigned map_size;
unsigned *data;
- if (info->indirect_params) {
+ if (indirect->indirect_draw_count) {
data = pipe_buffer_map_range(&sctx->b.b,
- info->indirect_params,
- info->indirect_params_offset,
+ indirect->indirect_draw_count,
+ indirect->indirect_draw_count_offset,
sizeof(unsigned),
PIPE_TRANSFER_READ, &transfer);
indirect_count = *data;
pipe_buffer_unmap(&sctx->b.b, transfer);
} else {
- indirect_count = info->indirect_count;
+ indirect_count = indirect->draw_count;
}
if (!indirect_count) {
*start = *count = 0;
return;
}
- map_size = (indirect_count - 1) * info->indirect_stride + 3 * sizeof(unsigned);
- data = pipe_buffer_map_range(&sctx->b.b, info->indirect,
- info->indirect_offset, map_size,
+ map_size = (indirect_count - 1) * indirect->stride + 3 * sizeof(unsigned);
+ data = pipe_buffer_map_range(&sctx->b.b, indirect->buffer,
+ indirect->offset, map_size,
PIPE_TRANSFER_READ, &transfer);
begin = UINT_MAX;
end = 0;
for (unsigned i = 0; i < indirect_count; ++i) {
unsigned count = data[0];
unsigned start = data[2];
if (count > 0) {
begin = MIN2(begin, start);
end = MAX2(end, start + count);
}
- data += info->indirect_stride / sizeof(unsigned);
+ data += indirect->stride / sizeof(unsigned);
}
pipe_buffer_unmap(&sctx->b.b, transfer);
if (begin < end) {
*start = begin;
*count = end - begin;
} else {
*start = *count = 0;
}
@@ -1277,32 +1280,34 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
} else if (sctx->b.chip_class <= CIK &&
r600_resource(ib->buffer)->TC_L2_dirty) {
/* VI reads index buffers through TC L2, so it doesn't
* need this. */
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
r600_resource(ib->buffer)->TC_L2_dirty = false;
}
}
if (info->indirect) {
+ struct pipe_draw_indirect_info *indirect = info->indirect;
+
/* Add the buffer size for memory checking in need_cs_space. */
- r600_context_add_resource_size(ctx, info->indirect);
+ r600_context_add_resource_size(ctx, indirect->buffer);
- if (r600_resource(info->indirect)->TC_L2_dirty) {
+ if (r600_resource(indirect->buffer)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(info->indirect)->TC_L2_dirty = false;
+ r600_resource(indirect->buffer)->TC_L2_dirty = false;
}
- if (info->indirect_params &&
- r600_resource(info->indirect_params)->TC_L2_dirty) {
+ if (indirect->indirect_draw_count &&
+ r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(info->indirect_params)->TC_L2_dirty = false;
+ r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
}
}
si_need_cs_space(sctx);
/* Since we've called r600_context_add_resource_size for vertex buffers,
* this must be called after si_need_cs_space, because we must let
* need_cs_space flush before we add buffers to the buffer list.
*/
if (!si_upload_vertex_buffer_descriptors(sctx))
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 13c0a9d..e4a5e3b 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -805,22 +805,30 @@ void trace_dump_draw_info(const struct pipe_draw_info *state)
trace_dump_member(int, state, index_bias);
trace_dump_member(uint, state, min_index);
trace_dump_member(uint, state, max_index);
trace_dump_member(bool, state, primitive_restart);
trace_dump_member(uint, state, restart_index);
trace_dump_member(ptr, state, count_from_stream_output);
- trace_dump_member(ptr, state, indirect);
- trace_dump_member(uint, state, indirect_offset);
+ if (!state->indirect) {
+ trace_dump_member(ptr, state, indirect);
+ } else {
+ trace_dump_member(uint, state, indirect->offset);
+ trace_dump_member(uint, state, indirect->stride);
+ trace_dump_member(uint, state, indirect->draw_count);
+ trace_dump_member(uint, state, indirect->indirect_draw_count_offset);
+ trace_dump_member(ptr, state, indirect->buffer);
+ trace_dump_member(ptr, state, indirect->indirect_draw_count);
+ }
trace_dump_struct_end();
}
void trace_dump_blit_info(const struct pipe_blit_info *info)
{
char mask[7];
if (!trace_dumping_enabled_locked())
return;
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 0c9b4b4..3cfdd34 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -634,20 +634,54 @@ struct pipe_vertex_element
*/
struct pipe_index_buffer
{
unsigned index_size; /**< size of an index, in bytes */
unsigned offset; /**< offset to start of data in buffer, in bytes */
struct pipe_resource *buffer; /**< the actual buffer */
const void *user_buffer; /**< pointer to a user buffer if buffer == NULL */
};
+struct pipe_draw_indirect_info
+{
+ unsigned offset; /**< must be 4 byte aligned */
+ unsigned stride; /**< must be 4 byte aligned */
+ unsigned draw_count; /**< number of indirect draws */
+ unsigned indirect_draw_count_offset; /**< must be 4 byte aligned */
+
+ /* Indirect draw parameters resource is laid out as follows:
+ *
+ * if indexed is TRUE:
+ * struct {
+ * uint32_t count;
+ * uint32_t instance_count;
+ * uint32_t start;
+ * int32_t index_bias;
+ * uint32_t start_instance;
+ * };
+ * otherwise:
+ * struct {
+ * uint32_t count;
+ * uint32_t instance_count;
+ * uint32_t start;
+ * uint32_t start_instance;
+ * };
+ */
+ struct pipe_resource *buffer;
+
+ /* Indirect draw count resource: If not NULL, contains a 32-bit value which
+ * is to be used as the real draw_count.
+ */
+ struct pipe_resource *indirect_draw_count;
+};
+
+
/**
* Information to describe a draw_vbo call.
*/
struct pipe_draw_info
{
boolean indexed; /**< use index buffer */
enum pipe_prim_type mode:8; /**< the mode of the primitive */
boolean primitive_restart;
ubyte vertices_per_patch; /**< the number of vertices per patch */
@@ -664,54 +698,23 @@ struct pipe_draw_info
*/
int index_bias; /**< a bias to be added to each index */
unsigned min_index; /**< the min index */
unsigned max_index; /**< the max index */
/**
* Primitive restart enable/index (only applies to indexed drawing)
*/
unsigned restart_index;
- unsigned indirect_offset; /**< must be 4 byte aligned */
- unsigned indirect_stride; /**< must be 4 byte aligned */
- unsigned indirect_count; /**< number of indirect draws */
-
- unsigned indirect_params_offset; /**< must be 4 byte aligned */
-
/* Pointers must be at the end for an optimal structure layout on 64-bit. */
- /* Indirect draw parameters resource: If not NULL, most values are taken
- * from this buffer instead, which is laid out as follows:
- *
- * if indexed is TRUE:
- * struct {
- * uint32_t count;
- * uint32_t instance_count;
- * uint32_t start;
- * int32_t index_bias;
- * uint32_t start_instance;
- * };
- * otherwise:
- * struct {
- * uint32_t count;
- * uint32_t instance_count;
- * uint32_t start;
- * uint32_t start_instance;
- * };
- */
- struct pipe_resource *indirect;
-
- /* Indirect draw count resource: If not NULL, contains a 32-bit value which
- * is to be used as the real indirect_count. In that case indirect_count
- * becomes the maximum possible value.
- */
- struct pipe_resource *indirect_params;
+ struct pipe_draw_indirect_info *indirect; /**< Indirect draw. */
/**
* Stream output target. If not NULL, it's used to provide the 'count'
* parameter based on the number vertices captured by the stream output
* stage. (or generally, based on the number of bytes captured)
*
* Only 'mode', 'start_instance', and 'instance_count' are taken into
* account, all the other variables from pipe_draw_info are ignored.
*
* 'start' is implicitly 0 and 'count' is set as discussed above.
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 6f97ddd..6390735 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -3024,21 +3024,20 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
return D3DERR_DRIVERINTERNALERROR;
}
draw.mode = PIPE_PRIM_POINTS;
draw.count = VertexCount;
draw.start_instance = 0;
draw.primitive_restart = FALSE;
draw.restart_index = 0;
draw.count_from_stream_output = NULL;
draw.indirect = NULL;
- draw.indirect_params = NULL;
draw.instance_count = 1;
draw.indexed = FALSE;
draw.start = 0;
draw.index_bias = 0;
draw.min_index = 0;
draw.max_index = VertexCount - 1;
pipe_sw->set_stream_output_targets(pipe_sw, 1, &target, offsets);
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 2046d9d..3b1cd7c 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -2552,21 +2552,20 @@ init_draw_info(struct pipe_draw_info *info,
info->mode = d3dprimitivetype_to_pipe_prim(type);
info->count = prim_count_to_vertex_count(type, count);
info->start_instance = 0;
info->instance_count = 1;
if (dev->context.stream_instancedata_mask & dev->context.stream_usage_mask)
info->instance_count = MAX2(dev->context.stream_freq[0] & 0x7FFFFF, 1);
info->primitive_restart = FALSE;
info->restart_index = 0;
info->count_from_stream_output = NULL;
info->indirect = NULL;
- info->indirect_params = NULL;
}
CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,
ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
ARG_VAL(UINT, StartVertex),
ARG_VAL(UINT, PrimitiveCount))
{
struct nine_context *context = &device->context;
struct pipe_draw_info info;
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 5c9f7ea..29381b6 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -257,76 +257,79 @@ st_indirect_draw_vbo(struct gl_context *ctx,
struct gl_buffer_object *indirect_data,
GLsizeiptr indirect_offset,
unsigned draw_count,
unsigned stride,
struct gl_buffer_object *indirect_params,
GLsizeiptr indirect_params_offset,
const struct _mesa_index_buffer *ib)
{
struct st_context *st = st_context(ctx);
struct pipe_draw_info info;
+ struct pipe_draw_indirect_info indirect;
/* Mesa core state should have been validated already */
assert(ctx->NewState == 0x0);
assert(stride);
st_invalidate_readpix_cache(st);
/* Validate state. */
if ((st->dirty | ctx->NewDriverState) & ST_PIPELINE_RENDER_STATE_MASK ||
st->gfx_shaders_may_be_dirty) {
st_validate_state(st, ST_PIPELINE_RENDER);
}
if (st->vertex_array_out_of_memory) {
return;
}
+ memset(&indirect, 0, sizeof(indirect));
util_draw_init_info(&info);
if (ib) {
setup_index_buffer(st, ib);
info.indexed = TRUE;
/* Primitive restart is not handled by the VBO module in this case. */
setup_primitive_restart(ctx, &info, ib->index_size);
}
info.mode = translate_prim(ctx, mode);
info.vertices_per_patch = ctx->TessCtrlProgram.patch_vertices;
- info.indirect = st_buffer_object(indirect_data)->buffer;
- info.indirect_offset = indirect_offset;
+ info.indirect = &indirect;
+ indirect.buffer = st_buffer_object(indirect_data)->buffer;
+ indirect.offset = indirect_offset;
if (ST_DEBUG & DEBUG_DRAW) {
debug_printf("st/draw indirect: mode %s drawcount %d indexed %d\n",
u_prim_name(info.mode),
draw_count,
info.indexed);
}
if (!st->has_multi_draw_indirect) {
int i;
assert(!indirect_params);
- info.indirect_count = 1;
+ indirect.draw_count = 1;
for (i = 0; i < draw_count; i++) {
info.drawid = i;
cso_draw_vbo(st->cso_context, &info);
- info.indirect_offset += stride;
+ indirect.offset += stride;
}
} else {
- info.indirect_count = draw_count;
- info.indirect_stride = stride;
+ indirect.draw_count = draw_count;
+ indirect.stride = stride;
if (indirect_params) {
- info.indirect_params = st_buffer_object(indirect_params)->buffer;
- info.indirect_params_offset = indirect_params_offset;
+ indirect.indirect_draw_count = st_buffer_object(indirect_params)->buffer;
+ indirect.indirect_draw_count_offset = indirect_params_offset;
}
cso_draw_vbo(st->cso_context, &info);
}
}
void
st_init_draw(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
--
2.7.4
More information about the mesa-dev
mailing list