[Mesa-dev] [PATCH 11/11] radeonsi: determine in advance which VBOs should be added to the buffer list
Marek Olšák
maraeo at gmail.com
Tue Jan 17 22:48:01 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_descriptors.c | 8 ++++----
src/gallium/drivers/radeonsi/si_state.c | 6 ++++++
src/gallium/drivers/radeonsi/si_state.h | 1 +
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 837f393..057f374 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -932,29 +932,29 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_DESCRIPTORS);
}
bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
struct si_vertex_element *velems = sctx->vertex_elements;
struct si_descriptors *desc = &sctx->vertex_buffers;
- bool bound[SI_NUM_VERTEX_BUFFERS] = {};
unsigned i, count = velems->count;
uint64_t va;
uint32_t *ptr;
if (!sctx->vertex_buffers_dirty || !count || !velems)
return true;
unsigned fix_size3 = velems->fix_size3;
+ unsigned first_vb_use_mask = velems->first_vb_use_mask;
/* Vertex buffer descriptors are the only ones which are uploaded
* directly through a staging buffer and don't go through
* the fine-grained upload path.
*/
u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, &desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
if (!desc->buffer)
return false;
@@ -962,23 +962,24 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_DESCRIPTORS);
assert(count <= SI_NUM_VERTEX_BUFFERS);
for (i = 0; i < count; i++) {
struct pipe_vertex_element *ve = &velems->elements[i];
struct pipe_vertex_buffer *vb;
struct r600_resource *rbuffer;
unsigned offset;
+ unsigned vbo_index = ve->vertex_buffer_index;
uint32_t *desc = &ptr[i*4];
- vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
+ vb = &sctx->vertex_buffer[vbo_index];
rbuffer = (struct r600_resource*)vb->buffer;
if (!rbuffer) {
memset(desc, 0, 16);
continue;
}
offset = vb->buffer_offset + ve->src_offset;
va = rbuffer->gpu_address + offset;
/* Fill in T# buffer resource description */
@@ -1011,25 +1012,24 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
size3 = (fix_size3 >> (2 * i)) & 3;
if (vb->stride && size3) {
assert(offset % 4 == 0 && vb->stride % 4 == 0);
assert(size3 <= 2);
desc[2] = align(desc[2], size3 * 2);
}
}
desc[3] = velems->rsrc_word3[i];
- if (!bound[ve->vertex_buffer_index]) {
+ if (first_vb_use_mask & (1 << vbo_index)) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource*)vb->buffer,
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
- bound[ve->vertex_buffer_index] = true;
}
}
/* Don't flush the const cache. It would have a very negative effect
* on performance (confirmed by testing). New descriptors are always
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
sctx->vertex_buffers_dirty = false;
sctx->vertex_buffer_pointer_dirty = true;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3022260..568e169 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3316,39 +3316,45 @@ static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
/*
* Vertex elements & buffers
*/
static void *si_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements)
{
struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
+ bool used[SI_NUM_VERTEX_BUFFERS] = {};
int i;
assert(count <= SI_MAX_ATTRIBS);
if (!v)
return NULL;
v->count = count;
for (i = 0; i < count; ++i) {
const struct util_format_description *desc;
const struct util_format_channel_description *channel;
unsigned data_format, num_format;
int first_non_void;
unsigned vbo_index = elements[i].vertex_buffer_index;
if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
FREE(v);
return NULL;
}
+ if (!used[vbo_index]) {
+ v->first_vb_use_mask |= 1 << vbo_index;
+ used[vbo_index] = true;
+ }
+
desc = util_format_description(elements[i].src_format);
first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
channel = &desc->channel[first_non_void];
v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 34a0f57..03e5011 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -92,20 +92,21 @@ struct si_state_dsa {
struct si_stencil_ref {
struct r600_atom atom;
struct pipe_stencil_ref state;
struct si_dsa_stencil_ref_part dsa_part;
};
struct si_vertex_element
{
unsigned count;
+ unsigned first_vb_use_mask;
/* Two bits per attribute indicating the size of each vector component
* in bytes if the size 3-workaround must be applied.
*/
uint32_t fix_size3;
uint64_t fix_fetch;
uint32_t rsrc_word3[SI_MAX_ATTRIBS];
uint32_t format_size[SI_MAX_ATTRIBS];
struct pipe_vertex_element elements[SI_MAX_ATTRIBS];
--
2.7.4
More information about the mesa-dev
mailing list