[Mesa-dev] [PATCH 11/11] radeonsi: determine in advance which VBOs should be added to the buffer list

Nicolai Hähnle nhaehnle at gmail.com
Wed Jan 18 12:56:10 UTC 2017


Some nice improvements in there. One minor stylistic remark on patch #8, 
apart from that patches 2-11 are

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

On 18.01.2017 03:11, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> v2: now it should be correct
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 8 ++++----
>  src/gallium/drivers/radeonsi/si_state.c       | 6 ++++++
>  src/gallium/drivers/radeonsi/si_state.h       | 1 +
>  3 files changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 837f393..391c58b 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -932,29 +932,29 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
>  		return;
>  	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>  			      desc->buffer, RADEON_USAGE_READ,
>  			      RADEON_PRIO_DESCRIPTORS);
>  }
>
>  bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
>  {
>  	struct si_vertex_element *velems = sctx->vertex_elements;
>  	struct si_descriptors *desc = &sctx->vertex_buffers;
> -	bool bound[SI_NUM_VERTEX_BUFFERS] = {};
>  	unsigned i, count = velems->count;
>  	uint64_t va;
>  	uint32_t *ptr;
>
>  	if (!sctx->vertex_buffers_dirty || !count || !velems)
>  		return true;
>
>  	unsigned fix_size3 = velems->fix_size3;
> +	unsigned first_vb_use_mask = velems->first_vb_use_mask;
>
>  	/* Vertex buffer descriptors are the only ones which are uploaded
>  	 * directly through a staging buffer and don't go through
>  	 * the fine-grained upload path.
>  	 */
>  	u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, &desc->buffer_offset,
>  		       (struct pipe_resource**)&desc->buffer, (void**)&ptr);
>  	if (!desc->buffer)
>  		return false;
>
> @@ -962,23 +962,24 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
>  			      desc->buffer, RADEON_USAGE_READ,
>  			      RADEON_PRIO_DESCRIPTORS);
>
>  	assert(count <= SI_NUM_VERTEX_BUFFERS);
>
>  	for (i = 0; i < count; i++) {
>  		struct pipe_vertex_element *ve = &velems->elements[i];
>  		struct pipe_vertex_buffer *vb;
>  		struct r600_resource *rbuffer;
>  		unsigned offset;
> +		unsigned vbo_index = ve->vertex_buffer_index;
>  		uint32_t *desc = &ptr[i*4];
>
> -		vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
> +		vb = &sctx->vertex_buffer[vbo_index];
>  		rbuffer = (struct r600_resource*)vb->buffer;
>  		if (!rbuffer) {
>  			memset(desc, 0, 16);
>  			continue;
>  		}
>
>  		offset = vb->buffer_offset + ve->src_offset;
>  		va = rbuffer->gpu_address + offset;
>
>  		/* Fill in T# buffer resource description */
> @@ -1011,25 +1012,24 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
>  			size3 = (fix_size3 >> (2 * i)) & 3;
>  			if (vb->stride && size3) {
>  				assert(offset % 4 == 0 && vb->stride % 4 == 0);
>  				assert(size3 <= 2);
>  				desc[2] = align(desc[2], size3 * 2);
>  			}
>  		}
>
>  		desc[3] = velems->rsrc_word3[i];
>
> -		if (!bound[ve->vertex_buffer_index]) {
> +		if (first_vb_use_mask & (1 << i)) {
>  			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>  					      (struct r600_resource*)vb->buffer,
>  					      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
> -			bound[ve->vertex_buffer_index] = true;
>  		}
>  	}
>
>  	/* Don't flush the const cache. It would have a very negative effect
>  	 * on performance (confirmed by testing). New descriptors are always
>  	 * uploaded to a fresh new buffer, so I don't think flushing the const
>  	 * cache is needed. */
>  	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
>  	sctx->vertex_buffers_dirty = false;
>  	sctx->vertex_buffer_pointer_dirty = true;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 3022260..f60a499 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3316,39 +3316,45 @@ static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
>
>  /*
>   * Vertex elements & buffers
>   */
>
>  static void *si_create_vertex_elements(struct pipe_context *ctx,
>  				       unsigned count,
>  				       const struct pipe_vertex_element *elements)
>  {
>  	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
> +	bool used[SI_NUM_VERTEX_BUFFERS] = {};
>  	int i;
>
>  	assert(count <= SI_MAX_ATTRIBS);
>  	if (!v)
>  		return NULL;
>
>  	v->count = count;
>  	for (i = 0; i < count; ++i) {
>  		const struct util_format_description *desc;
>  		const struct util_format_channel_description *channel;
>  		unsigned data_format, num_format;
>  		int first_non_void;
>  		unsigned vbo_index = elements[i].vertex_buffer_index;
>
>  		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
>  			FREE(v);
>  			return NULL;
>  		}
>
> +		if (!used[vbo_index]) {
> +			v->first_vb_use_mask |= 1 << i;
> +			used[vbo_index] = true;
> +		}
> +
>  		desc = util_format_description(elements[i].src_format);
>  		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
>  		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
>  		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
>  		channel = &desc->channel[first_non_void];
>
>  		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
>  				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
>  				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
>  				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 34a0f57..03e5011 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -92,20 +92,21 @@ struct si_state_dsa {
>
>  struct si_stencil_ref {
>  	struct r600_atom		atom;
>  	struct pipe_stencil_ref		state;
>  	struct si_dsa_stencil_ref_part	dsa_part;
>  };
>
>  struct si_vertex_element
>  {
>  	unsigned			count;
> +	unsigned			first_vb_use_mask;
>
>  	/* Two bits per attribute indicating the size of each vector component
>  	 * in bytes if the size 3-workaround must be applied.
>  	 */
>  	uint32_t			fix_size3;
>  	uint64_t			fix_fetch;
>
>  	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
>  	uint32_t			format_size[SI_MAX_ATTRIBS];
>  	struct pipe_vertex_element	elements[SI_MAX_ATTRIBS];
>


More information about the mesa-dev mailing list