[Mesa-dev] [PATCH 04/13] r600g: do fine-grained vertex buffer updates
Marek Olšák
maraeo at gmail.com
Sat Jul 14 19:35:35 PDT 2012
If only some buffers are changed, the other ones don't have to re-emitted.
This uses bitmasks of enabled and dirty buffers just like
emit_constant_buffers does.
---
src/gallium/drivers/r600/evergreen_compute.c | 12 +++---
src/gallium/drivers/r600/evergreen_state.c | 23 +++--------
src/gallium/drivers/r600/r600_blit.c | 4 +-
src/gallium/drivers/r600/r600_buffer.c | 15 ++++---
src/gallium/drivers/r600/r600_hw_context.c | 5 ++-
src/gallium/drivers/r600/r600_pipe.h | 12 +++---
src/gallium/drivers/r600/r600_state.c | 23 ++++++-----
src/gallium/drivers/r600/r600_state_common.c | 56 +++++++++++++++++++++-----
8 files changed, 87 insertions(+), 63 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 947a328..caaa752 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -89,14 +89,15 @@ static void evergreen_cs_set_vertex_buffer(
unsigned offset,
struct pipe_resource * buffer)
{
- struct pipe_vertex_buffer *vb = &rctx->cs_vertex_buffer[vb_index];
- struct r600_vertexbuf_state * state = &rctx->cs_vertex_buffer_state;
+ struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state;
+ struct pipe_vertex_buffer *vb = &state->vb[vb_index];
vb->stride = 1;
vb->buffer_offset = offset;
vb->buffer = buffer;
vb->user_buffer = NULL;
r600_inval_vertex_cache(rctx);
+ state->enabled_mask |= 1 << vb_index;
state->dirty_mask |= 1 << vb_index;
r600_atom_dirty(rctx, &state->atom);
}
@@ -369,7 +370,7 @@ static void compute_emit_cs(struct r600_context *ctx)
r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
/* Emit vertex buffer state */
- ctx->cs_vertex_buffer_state.atom.num_dw = 12 * ctx->nr_cs_vertex_buffers;
+ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);
for (i = 0; i < get_compute_resource_num(); i++) {
@@ -493,10 +494,8 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
evergreen_cs_set_vertex_buffer(ctx, vtx_id,
buffer->chunk->start_in_dw * 4,
resources[i]->base.texture);
- ctx->nr_cs_vertex_buffers = vtx_id + 1;
}
}
-
}
static void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
@@ -740,7 +739,8 @@ void evergreen_init_compute_state_functions(struct r600_context *ctx)
/* We always use at least two vertex buffers for compute, one for
* parameters and one for global memory */
- ctx->nr_cs_vertex_buffers = 2;
+ ctx->cs_vertex_buffer_state.enabled_mask =
+ ctx->cs_vertex_buffer_state.dirty_mask = 1 | 2;
}
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 0d2fa30..72ddc0b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1772,8 +1772,6 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
struct r600_vertexbuf_state *state,
- struct pipe_vertex_buffer *vertex_buffers,
- unsigned vb_count,
unsigned resource_offset,
unsigned pkt_flags)
{
@@ -1784,13 +1782,11 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
struct pipe_vertex_buffer *vb;
struct r600_resource *rbuffer;
uint64_t va;
- unsigned buffer_index = ffs(dirty_mask) - 1;
+ unsigned buffer_index = u_bit_scan(&dirty_mask);
- vb = &vertex_buffers[buffer_index];
+ vb = &state->vb[buffer_index];
rbuffer = (struct r600_resource*)vb->buffer;
- if (!rbuffer) {
- goto next;
- }
+ assert(rbuffer);
va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b);
va += vb->buffer_offset;
@@ -1816,26 +1812,19 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
-
-next:
- dirty_mask &= ~(1 << buffer_index);
}
state->dirty_mask = 0;
}
static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
{
- evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state,
- rctx->vertex_buffer,
- rctx->nr_vertex_buffers, 992, 0);
+ evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, 992, 0);
}
static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
{
- evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state,
- rctx->cs_vertex_buffer,
- rctx->nr_cs_vertex_buffers, 816,
- RADEON_CP_PACKET3_COMPUTE_MODE);
+ evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, 816,
+ RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_constant_buffers(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 98f8b84..ca5aaf8 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -60,8 +60,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
util_blitter_save_viewport(rctx->blitter, &rctx->viewport);
}
util_blitter_save_vertex_buffers(rctx->blitter,
- rctx->nr_vertex_buffers,
- rctx->vertex_buffer);
+ util_last_bit(rctx->vertex_buffer_state.enabled_mask),
+ rctx->vertex_buffer_state.vb);
util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets,
(struct pipe_stream_output_target**)rctx->so_targets);
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 8e2deb1..165427e 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -93,7 +93,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
- unsigned i;
+ unsigned i, mask;
/* Discard the buffer. */
pb_reference(&rbuffer->buf, NULL);
@@ -105,13 +105,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
/* We changed the buffer, now we need to bind it where the old one was bound. */
/* Vertex buffers. */
- for (i = 0; i < rctx->nr_vertex_buffers; i++) {
- if (rctx->vertex_buffer[i].buffer == &rbuffer->b.b) {
- struct r600_vertexbuf_state * state =
- &rctx->vertex_buffer_state;
- state->dirty_mask |= 1 << i;
- r600_inval_vertex_cache(rctx);
- r600_atom_dirty(rctx, &state->atom);
+ mask = rctx->vertex_buffer_state.enabled_mask;
+ while (mask) {
+ i = u_bit_scan(&mask);
+ if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
+ rctx->vertex_buffer_state.dirty_mask |= 1 << i;
+ r600_vertex_buffers_dirty(rctx);
}
}
/* Streamout buffers. */
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 2951b86..d0a5918 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1274,14 +1274,15 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
/* Invalidate caches. */
- r600_inval_vertex_cache(ctx);
r600_inval_texture_cache(ctx);
r600_flush_framebuffer(ctx, false);
/* Re-emit states. */
r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
- r600_atom_dirty(ctx, &ctx->vertex_buffer_state.atom);
+
+ ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
+ r600_vertex_buffers_dirty(ctx);
ctx->vs_constbuf_state.dirty_mask = ctx->vs_constbuf_state.enabled_mask;
ctx->ps_constbuf_state.dirty_mask = ctx->ps_constbuf_state.enabled_mask;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 6449a4d..200f0a2 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -278,6 +278,8 @@ struct r600_constbuf_state
struct r600_vertexbuf_state
{
struct r600_atom atom;
+ struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+ uint32_t enabled_mask; /* non-NULL buffers */
uint32_t dirty_mask;
};
@@ -399,13 +401,8 @@ struct r600_context {
boolean dual_src_blend;
- /* Vertex and index buffers. */
- bool vertex_buffers_dirty;
+ /* Index buffer. */
struct pipe_index_buffer index_buffer;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- unsigned nr_vertex_buffers;
- struct pipe_vertex_buffer cs_vertex_buffer[PIPE_MAX_ATTRIBS];
- unsigned nr_cs_vertex_buffers;
};
static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
@@ -528,8 +525,9 @@ unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
void r600_texture_barrier(struct pipe_context *ctx);
void r600_set_index_buffer(struct pipe_context *ctx,
const struct pipe_index_buffer *ib);
+void r600_vertex_buffers_dirty(struct r600_context *rctx);
void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
- const struct pipe_vertex_buffer *buffers);
+ const struct pipe_vertex_buffer *input);
void *r600_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3d5835c..4f475b3 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1748,27 +1748,28 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->cs;
- struct pipe_vertex_buffer *vb = rctx->vertex_buffer;
- unsigned count = rctx->nr_vertex_buffers;
- unsigned i, offset;
+ uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
- for (i = 0; i < count; i++) {
- struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
+ while (dirty_mask) {
+ struct pipe_vertex_buffer *vb;
+ struct r600_resource *rbuffer;
+ unsigned offset;
+ unsigned buffer_index = u_bit_scan(&dirty_mask);
- if (!rbuffer) {
- continue;
- }
+ vb = &rctx->vertex_buffer_state.vb[buffer_index];
+ rbuffer = (struct r600_resource*)vb->buffer;
+ assert(rbuffer);
- offset = vb[i].buffer_offset;
+ offset = vb->buffer_offset;
/* fetch resources start at index 320 */
r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
- r600_write_value(cs, (320 + i) * 7);
+ r600_write_value(cs, (320 + buffer_index) * 7);
r600_write_value(cs, offset); /* RESOURCEi_WORD0 */
r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
r600_write_value(cs, /* RESOURCEi_WORD2 */
S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
- S_038008_STRIDE(vb[i].stride));
+ S_038008_STRIDE(vb->stride));
r600_write_value(cs, 0); /* RESOURCEi_WORD3 */
r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index be3d101..4fa2699 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -403,22 +403,58 @@ void r600_set_index_buffer(struct pipe_context *ctx,
}
}
+void r600_vertex_buffers_dirty(struct r600_context *rctx)
+{
+ if (rctx->vertex_buffer_state.dirty_mask) {
+ r600_inval_vertex_cache(rctx);
+ rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
+ util_bitcount(rctx->vertex_buffer_state.dirty_mask);
+ r600_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
+ }
+}
+
void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
- const struct pipe_vertex_buffer *buffers)
+ const struct pipe_vertex_buffer *input)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_vertexbuf_state * state = &rctx->vertex_buffer_state;
+ struct r600_vertexbuf_state *state = &rctx->vertex_buffer_state;
+ struct pipe_vertex_buffer *vb = state->vb;
unsigned i;
+ /* This sets 1-bit for buffers with index >= count. */
+ uint32_t disable_mask = ~((1ull << count) - 1);
+ /* These are the new buffers set by this function. */
+ uint32_t new_buffer_mask = 0;
+
+ /* Set buffers with index >= count to NULL. */
+ uint32_t remaining_buffers_mask =
+ rctx->vertex_buffer_state.enabled_mask & disable_mask;
+
+ while (remaining_buffers_mask) {
+ i = u_bit_scan(&remaining_buffers_mask);
+ pipe_resource_reference(&vb[i].buffer, NULL);
+ }
- util_copy_vertex_buffers(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, count);
+ /* Set vertex buffers. */
+ for (i = 0; i < count; i++) {
+ if (memcmp(&input[i], &vb[i], sizeof(struct pipe_vertex_buffer))) {
+ if (input[i].buffer) {
+ vb[i].stride = input[i].stride;
+ vb[i].buffer_offset = input[i].buffer_offset;
+ pipe_resource_reference(&vb[i].buffer, input[i].buffer);
+ new_buffer_mask |= 1 << i;
+ } else {
+ pipe_resource_reference(&vb[i].buffer, NULL);
+ disable_mask |= 1 << i;
+ }
+ }
+ }
- r600_inval_vertex_cache(rctx);
- state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 10) *
- rctx->nr_vertex_buffers;
- for (i = 0 ; i < rctx->nr_vertex_buffers; i++) {
- state->dirty_mask |= 1 << i;
- }
- r600_atom_dirty(rctx, &state->atom);
+ rctx->vertex_buffer_state.enabled_mask &= ~disable_mask;
+ rctx->vertex_buffer_state.dirty_mask &= rctx->vertex_buffer_state.enabled_mask;
+ rctx->vertex_buffer_state.enabled_mask |= new_buffer_mask;
+ rctx->vertex_buffer_state.dirty_mask |= new_buffer_mask;
+
+ r600_vertex_buffers_dirty(rctx);
}
void *r600_create_vertex_elements(struct pipe_context *ctx,
--
1.7.9.5
More information about the mesa-dev
mailing list