[Mesa-dev] [PATCH 08/19] radeonsi: optimize si_invalidate_buffer based on bind_history

Nicolai Hähnle nhaehnle at gmail.com
Tue Oct 4 09:27:54 UTC 2016


On 02.10.2016 23:09, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> Just enclose each section with: if (rbuffer->bind_history & PIPE_BIND_...)
>
> Bioshock Infinite: +1% performance
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 191 ++++++++++++++------------
>  1 file changed, 101 insertions(+), 90 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 43bef81..b60cd61 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -1496,128 +1496,139 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
>  	/* Reallocate the buffer in the same pipe_resource. */
>  	r600_alloc_resource(&sctx->screen->b, rbuffer);
>
>  	/* We changed the buffer, now we need to bind it where the old one
>  	 * was bound. This consists of 2 things:
>  	 *   1) Updating the resource descriptor and dirtying it.
>  	 *   2) Adding a relocation to the CS, so that it's usable.
>  	 */
>
>  	/* Vertex buffers. */
> -	for (i = 0; i < num_elems; i++) {
> -		int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
> +	if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
> +		for (i = 0; i < num_elems; i++) {
> +			int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
>
> -		if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
> -			continue;
> -		if (!sctx->vertex_buffer[vb].buffer)
> -			continue;
> +			if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
> +				continue;
> +			if (!sctx->vertex_buffer[vb].buffer)
> +				continue;
>
> -		if (sctx->vertex_buffer[vb].buffer == buf) {
> -			sctx->vertex_buffers_dirty = true;
> -			break;
> +			if (sctx->vertex_buffer[vb].buffer == buf) {
> +				sctx->vertex_buffers_dirty = true;
> +				break;
> +			}
>  		}
>  	}
>
>  	/* Streamout buffers. (other internal buffers can't be invalidated) */
> -	for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
> -		struct si_buffer_resources *buffers = &sctx->rw_buffers;
> -		struct si_descriptors *descs =
> -			&sctx->descriptors[SI_DESCS_RW_BUFFERS];
> +	if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
> +		for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
> +			struct si_buffer_resources *buffers = &sctx->rw_buffers;
> +			struct si_descriptors *descs =
> +				&sctx->descriptors[SI_DESCS_RW_BUFFERS];
>
> -		if (buffers->buffers[i] != buf)
> -			continue;
> -
> -		si_desc_reset_buffer_offset(ctx, descs->list + i*4,
> -					    old_va, buf);
> -		descs->dirty_mask |= 1u << i;
> -		sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
> +			if (buffers->buffers[i] != buf)
> +				continue;
>
> -		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> -						    rbuffer, buffers->shader_usage,
> -						    RADEON_PRIO_SHADER_RW_BUFFER,
> -						    true);
> -
> -		/* Update the streamout state. */
> -		if (sctx->b.streamout.begin_emitted)
> -			r600_emit_streamout_end(&sctx->b);
> -		sctx->b.streamout.append_bitmask =
> -				sctx->b.streamout.enabled_mask;
> -		r600_streamout_buffers_dirty(&sctx->b);
> -	}
> +			si_desc_reset_buffer_offset(ctx, descs->list + i*4,
> +						    old_va, buf);
> +			descs->dirty_mask |= 1u << i;
> +			sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
>
> -	/* Constant and shader buffers. */
> -	for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> -		si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
> -					  si_const_buffer_descriptors_idx(shader),
> -					  buf, old_va);
> -		si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
> -					  si_shader_buffer_descriptors_idx(shader),
> -					  buf, old_va);
> -	}
> +			radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> +							    rbuffer, buffers->shader_usage,
> +							    RADEON_PRIO_SHADER_RW_BUFFER,
> +							    true);
>
> -	/* Texture buffers - update virtual addresses in sampler view descriptors. */
> -	LIST_FOR_EACH_ENTRY(view, &sctx->b.texture_buffers, list) {
> -		if (view->base.texture == buf) {
> -			si_desc_reset_buffer_offset(ctx, &view->state[4], old_va, buf);
> +			/* Update the streamout state. */
> +			if (sctx->b.streamout.begin_emitted)
> +				r600_emit_streamout_end(&sctx->b);
> +			sctx->b.streamout.append_bitmask =
> +					sctx->b.streamout.enabled_mask;
> +			r600_streamout_buffers_dirty(&sctx->b);
>  		}
>  	}
> -	/* Texture buffers - update bindings. */
> -	for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> -		struct si_sampler_views *views = &sctx->samplers[shader].views;
> -		struct si_descriptors *descs =
> -			si_sampler_descriptors(sctx, shader);
> -		unsigned mask = views->enabled_mask;
>
> -		while (mask) {
> -			unsigned i = u_bit_scan(&mask);
> -			if (views->views[i]->texture == buf) {
> -				si_desc_reset_buffer_offset(ctx,
> -							    descs->list +
> -							    i * 16 + 4,
> -							    old_va, buf);
> -				descs->dirty_mask |= 1u << i;
> -				sctx->descriptors_dirty |=
> -					1u << si_sampler_descriptors_idx(shader);
> -
> -				radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> -								    rbuffer, RADEON_USAGE_READ,
> -								    RADEON_PRIO_SAMPLER_BUFFER,
> -								    true);
> +	/* Constant and shader buffers. */
> +	if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER)
> +		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
> +			si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
> +						  si_const_buffer_descriptors_idx(shader),
> +						  buf, old_va);
> +
> +	if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER)
> +		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
> +			si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
> +						  si_shader_buffer_descriptors_idx(shader),
> +						  buf, old_va);

Please put braces around multi-line blocks. Not having them is an 
unnecessary source of silly mistakes.

With that changed, patches 1-8 are

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

> +
> +	if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
> +		/* Texture buffers - update virtual addresses in sampler view descriptors. */
> +		LIST_FOR_EACH_ENTRY(view, &sctx->b.texture_buffers, list) {
> +			if (view->base.texture == buf) {
> +				si_desc_reset_buffer_offset(ctx, &view->state[4], old_va, buf);
> +			}
> +		}
> +		/* Texture buffers - update bindings. */
> +		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> +			struct si_sampler_views *views = &sctx->samplers[shader].views;
> +			struct si_descriptors *descs =
> +				si_sampler_descriptors(sctx, shader);
> +			unsigned mask = views->enabled_mask;
> +
> +			while (mask) {
> +				unsigned i = u_bit_scan(&mask);
> +				if (views->views[i]->texture == buf) {
> +					si_desc_reset_buffer_offset(ctx,
> +								    descs->list +
> +								    i * 16 + 4,
> +								    old_va, buf);
> +					descs->dirty_mask |= 1u << i;
> +					sctx->descriptors_dirty |=
> +						1u << si_sampler_descriptors_idx(shader);
> +
> +					radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> +									    rbuffer, RADEON_USAGE_READ,
> +									    RADEON_PRIO_SAMPLER_BUFFER,
> +									    true);
> +				}
>  			}
>  		}
>  	}
>
>  	/* Shader images */
> -	for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
> -		struct si_images_info *images = &sctx->images[shader];
> -		struct si_descriptors *descs =
> -			si_image_descriptors(sctx, shader);
> -		unsigned mask = images->enabled_mask;
> -
> -		while (mask) {
> -			unsigned i = u_bit_scan(&mask);
> -
> -			if (images->views[i].resource == buf) {
> -				if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
> -					si_mark_image_range_valid(&images->views[i]);
> -
> -				si_desc_reset_buffer_offset(
> -					ctx, descs->list + i * 8 + 4,
> -					old_va, buf);
> -				descs->dirty_mask |= 1u << i;
> -				sctx->descriptors_dirty |=
> -					1u << si_image_descriptors_idx(shader);
> -
> -				radeon_add_to_buffer_list_check_mem(
> -					&sctx->b, &sctx->b.gfx, rbuffer,
> -					RADEON_USAGE_READWRITE,
> -					RADEON_PRIO_SAMPLER_BUFFER, true);
> +	if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
> +		for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
> +			struct si_images_info *images = &sctx->images[shader];
> +			struct si_descriptors *descs =
> +				si_image_descriptors(sctx, shader);
> +			unsigned mask = images->enabled_mask;
> +
> +			while (mask) {
> +				unsigned i = u_bit_scan(&mask);
> +
> +				if (images->views[i].resource == buf) {
> +					if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
> +						si_mark_image_range_valid(&images->views[i]);
> +
> +					si_desc_reset_buffer_offset(
> +						ctx, descs->list + i * 8 + 4,
> +						old_va, buf);
> +					descs->dirty_mask |= 1u << i;
> +					sctx->descriptors_dirty |=
> +						1u << si_image_descriptors_idx(shader);
> +
> +					radeon_add_to_buffer_list_check_mem(
> +						&sctx->b, &sctx->b.gfx, rbuffer,
> +						RADEON_USAGE_READWRITE,
> +						RADEON_PRIO_SAMPLER_BUFFER, true);
> +				}
>  			}
>  		}
>  	}
>  }
>
>  /* Update mutable image descriptor fields of all bound textures. */
>  void si_update_all_texture_descriptors(struct si_context *sctx)
>  {
>  	unsigned shader;
>
>


More information about the mesa-dev mailing list