[Mesa-dev] [PATCH 3/6] radeonsi: align vertex buffer descriptor list size for optimal prefetch

Marek Olšák maraeo at gmail.com
Thu Feb 9 11:21:46 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_cp_dma.c      | 2 +-
 src/gallium/drivers/radeonsi/si_descriptors.c | 3 ++-
 src/gallium/drivers/radeonsi/si_state.c       | 2 ++
 src/gallium/drivers/radeonsi/si_state.h       | 2 ++
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 9fa3ccb..ea999d9 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -427,21 +427,21 @@ static void cik_emit_prefetch_L2(struct si_context *sctx, struct r600_atom *atom
 	if (si_pm4_state_changed(sctx, gs))
 		cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
 	if (si_pm4_state_changed(sctx, vs))
 		cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
 
 	/* Vertex buffer descriptors are uploaded uncached, so prefetch
 	 * them right after the VS binary. */
 	if (sctx->vertex_buffer_pointer_dirty) {
 		cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
 					 sctx->vertex_buffers.buffer_offset,
-					 sctx->vertex_elements->count * 16);
+					 sctx->vertex_elements->desc_list_byte_size);
 	}
 	if (si_pm4_state_changed(sctx, ps))
 		cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
 }
 
 void si_init_cp_dma_functions(struct si_context *sctx)
 {
 	sctx->b.clear_buffer = si_clear_buffer;
 
 	si_init_atom(sctx, &sctx->prefetch_L2, &sctx->atoms.s.prefetch_L2,
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 9acc423..b0faf42 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -954,21 +954,22 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 	if (!sctx->vertex_buffers_dirty || !count || !velems)
 		return true;
 
 	unsigned fix_size3 = velems->fix_size3;
 	unsigned first_vb_use_mask = velems->first_vb_use_mask;
 
 	/* Vertex buffer descriptors are the only ones which are uploaded
 	 * directly through a staging buffer and don't go through
 	 * the fine-grained upload path.
 	 */
-	u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, &desc->buffer_offset,
+	u_upload_alloc(sctx->b.uploader, 0, velems->desc_list_byte_size, 256,
+		       &desc->buffer_offset,
 		       (struct pipe_resource**)&desc->buffer, (void**)&ptr);
 	if (!desc->buffer)
 		return false;
 
 	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 			      desc->buffer, RADEON_USAGE_READ,
 			      RADEON_PRIO_DESCRIPTORS);
 
 	assert(count <= SI_NUM_VERTEX_BUFFERS);
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 5a163b1..1e0729c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3344,20 +3344,22 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 {
 	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
 	bool used[SI_NUM_VERTEX_BUFFERS] = {};
 	int i;
 
 	assert(count <= SI_MAX_ATTRIBS);
 	if (!v)
 		return NULL;
 
 	v->count = count;
+	v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT);
+
 	for (i = 0; i < count; ++i) {
 		const struct util_format_description *desc;
 		const struct util_format_channel_description *channel;
 		unsigned data_format, num_format;
 		int first_non_void;
 		unsigned vbo_index = elements[i].vertex_buffer_index;
 
 		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
 			FREE(v);
 			return NULL;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 3f08f54..07b7d58 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -93,20 +93,22 @@ struct si_state_dsa {
 struct si_stencil_ref {
 	struct r600_atom		atom;
 	struct pipe_stencil_ref		state;
 	struct si_dsa_stencil_ref_part	dsa_part;
 };
 
 struct si_vertex_element
 {
 	unsigned			count;
 	unsigned			first_vb_use_mask;
+	/* Vertex buffer descriptor list size aligned for optimal prefetch. */
+	unsigned			desc_list_byte_size;
 
 	/* Two bits per attribute indicating the size of each vector component
 	 * in bytes if the size 3-workaround must be applied.
 	 */
 	uint32_t			fix_size3;
 	uint64_t			fix_fetch;
 
 	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
 	uint32_t			format_size[SI_MAX_ATTRIBS];
 	struct pipe_vertex_element	elements[SI_MAX_ATTRIBS];
-- 
2.7.4



More information about the mesa-dev mailing list