[Mesa-dev] [PATCH 2/2] r600g: put user indices into the command stream for small index counts
Marek Olšák
maraeo at gmail.com
Sun Oct 7 16:55:03 PDT 2012
This improves performance a little bit if there are lots of small indexed
draw commands.
---
src/gallium/drivers/r600/r600_state_common.c | 41 +++++++++++++++++---------
src/gallium/drivers/r600/r600d.h | 1 +
2 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index df8a3f5..8b8e34d 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1125,7 +1125,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
unsigned i;
struct r600_block *dirty_block = NULL, *next_block = NULL;
struct radeon_winsys_cs *cs = rctx->cs;
- uint64_t va;
if (!info.count && (info.indexed || !info.count_from_stream_output)) {
assert(0);
@@ -1165,10 +1164,15 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
ib.index_size = 2;
}
- /* Upload the index buffer. */
- if (ib.user_buffer) {
+ /* Upload the index buffer.
+ * The upload is skipped for small index counts on little-endian machines
+ * and the indices are emitted via PKT3_DRAW_INDEX_IMMD.
+ * Note: Instanced rendering in combination with immediate indices hangs. */
+ if (ib.user_buffer && (R600_BIG_ENDIAN || info.instance_count > 1 ||
+ info.count*ib.index_size > 20)) {
u_upload_data(rctx->uploader, 0, info.count * ib.index_size,
ib.user_buffer, &ib.offset, &ib.buffer);
+ ib.user_buffer = NULL;
}
} else {
info.index_bias = info.start;
@@ -1192,8 +1196,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
rctx->vgt_state.atom.dirty = true;
}
- /* Emit states (the function expects that we emit at most 17 dwords here). */
- r600_need_cs_space(rctx, 0, TRUE);
+ /* Emit states. */
+ r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
r600_flush_emit(rctx);
for (i = 0; i < R600_NUM_ATOMS; i++) {
@@ -1243,15 +1247,24 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
(VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) :
(VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0));
- va = r600_resource_va(ctx->screen, ib.buffer);
- va += ib.offset;
- cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->predicate_drawing);
- cs->buf[cs->cdw++] = va;
- cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
- cs->buf[cs->cdw++] = info.count;
- cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->predicate_drawing);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ);
+ if (ib.user_buffer) {
+ unsigned size_bytes = info.count*ib.index_size;
+ unsigned size_dw = align(size_bytes, 4) / 4;
+ cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, rctx->predicate_drawing);
+ cs->buf[cs->cdw++] = info.count;
+ cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_IMMEDIATE;
+ memcpy(cs->buf+cs->cdw, ib.user_buffer, size_bytes);
+ cs->cdw += size_dw;
+ } else {
+ uint64_t va = r600_resource_va(ctx->screen, ib.buffer) + ib.offset;
+ cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->predicate_drawing);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+ cs->buf[cs->cdw++] = info.count;
+ cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->predicate_drawing);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ);
+ }
} else {
if (info.count_from_stream_output) {
struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 4b46bd7..8cb5fea 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -988,6 +988,7 @@
#define G_0287F0_SOURCE_SELECT(x) (((x) >> 0) & 0x3)
#define C_0287F0_SOURCE_SELECT 0xFFFFFFFC
#define V_0287F0_DI_SRC_SEL_DMA 0
+#define V_0287F0_DI_SRC_SEL_IMMEDIATE 1
#define V_0287F0_DI_SRC_SEL_AUTO_INDEX 2
#define S_0287F0_MAJOR_MODE(x) (((x) & 0x3) << 2)
#define G_0287F0_MAJOR_MODE(x) (((x) >> 2) & 0x3)
--
1.7.9.5
More information about the mesa-dev
mailing list