[Mesa-dev] [PATCH 08/19] radeonsi: optimize si_invalidate_buffer based on bind_history
Nicolai Hähnle
nhaehnle at gmail.com
Tue Oct 4 09:27:54 UTC 2016
On 02.10.2016 23:09, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> Just enclose each section with: if (rbuffer->bind_history & PIPE_BIND_...)
>
> Bioshock Infinite: +1% performance
> ---
> src/gallium/drivers/radeonsi/si_descriptors.c | 191 ++++++++++++++------------
> 1 file changed, 101 insertions(+), 90 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 43bef81..b60cd61 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -1496,128 +1496,139 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
> /* Reallocate the buffer in the same pipe_resource. */
> r600_alloc_resource(&sctx->screen->b, rbuffer);
>
> /* We changed the buffer, now we need to bind it where the old one
> * was bound. This consists of 2 things:
> * 1) Updating the resource descriptor and dirtying it.
> * 2) Adding a relocation to the CS, so that it's usable.
> */
>
> /* Vertex buffers. */
> - for (i = 0; i < num_elems; i++) {
> - int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
> + if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
> + for (i = 0; i < num_elems; i++) {
> + int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
>
> - if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
> - continue;
> - if (!sctx->vertex_buffer[vb].buffer)
> - continue;
> + if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
> + continue;
> + if (!sctx->vertex_buffer[vb].buffer)
> + continue;
>
> - if (sctx->vertex_buffer[vb].buffer == buf) {
> - sctx->vertex_buffers_dirty = true;
> - break;
> + if (sctx->vertex_buffer[vb].buffer == buf) {
> + sctx->vertex_buffers_dirty = true;
> + break;
> + }
> }
> }
>
> /* Streamout buffers. (other internal buffers can't be invalidated) */
> - for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
> - struct si_buffer_resources *buffers = &sctx->rw_buffers;
> - struct si_descriptors *descs =
> - &sctx->descriptors[SI_DESCS_RW_BUFFERS];
> + if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
> + for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
> + struct si_buffer_resources *buffers = &sctx->rw_buffers;
> + struct si_descriptors *descs =
> + &sctx->descriptors[SI_DESCS_RW_BUFFERS];
>
> - if (buffers->buffers[i] != buf)
> - continue;
> -
> - si_desc_reset_buffer_offset(ctx, descs->list + i*4,
> - old_va, buf);
> - descs->dirty_mask |= 1u << i;
> - sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
> + if (buffers->buffers[i] != buf)
> + continue;
>
> - radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> - rbuffer, buffers->shader_usage,
> - RADEON_PRIO_SHADER_RW_BUFFER,
> - true);
> -
> - /* Update the streamout state. */
> - if (sctx->b.streamout.begin_emitted)
> - r600_emit_streamout_end(&sctx->b);
> - sctx->b.streamout.append_bitmask =
> - sctx->b.streamout.enabled_mask;
> - r600_streamout_buffers_dirty(&sctx->b);
> - }
> + si_desc_reset_buffer_offset(ctx, descs->list + i*4,
> + old_va, buf);
> + descs->dirty_mask |= 1u << i;
> + sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
>
> - /* Constant and shader buffers. */
> - for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> - si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
> - si_const_buffer_descriptors_idx(shader),
> - buf, old_va);
> - si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
> - si_shader_buffer_descriptors_idx(shader),
> - buf, old_va);
> - }
> + radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> + rbuffer, buffers->shader_usage,
> + RADEON_PRIO_SHADER_RW_BUFFER,
> + true);
>
> - /* Texture buffers - update virtual addresses in sampler view descriptors. */
> - LIST_FOR_EACH_ENTRY(view, &sctx->b.texture_buffers, list) {
> - if (view->base.texture == buf) {
> - si_desc_reset_buffer_offset(ctx, &view->state[4], old_va, buf);
> + /* Update the streamout state. */
> + if (sctx->b.streamout.begin_emitted)
> + r600_emit_streamout_end(&sctx->b);
> + sctx->b.streamout.append_bitmask =
> + sctx->b.streamout.enabled_mask;
> + r600_streamout_buffers_dirty(&sctx->b);
> }
> }
> - /* Texture buffers - update bindings. */
> - for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> - struct si_sampler_views *views = &sctx->samplers[shader].views;
> - struct si_descriptors *descs =
> - si_sampler_descriptors(sctx, shader);
> - unsigned mask = views->enabled_mask;
>
> - while (mask) {
> - unsigned i = u_bit_scan(&mask);
> - if (views->views[i]->texture == buf) {
> - si_desc_reset_buffer_offset(ctx,
> - descs->list +
> - i * 16 + 4,
> - old_va, buf);
> - descs->dirty_mask |= 1u << i;
> - sctx->descriptors_dirty |=
> - 1u << si_sampler_descriptors_idx(shader);
> -
> - radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> - rbuffer, RADEON_USAGE_READ,
> - RADEON_PRIO_SAMPLER_BUFFER,
> - true);
> + /* Constant and shader buffers. */
> + if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER)
> + for (shader = 0; shader < SI_NUM_SHADERS; shader++)
> + si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
> + si_const_buffer_descriptors_idx(shader),
> + buf, old_va);
> +
> + if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER)
> + for (shader = 0; shader < SI_NUM_SHADERS; shader++)
> + si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
> + si_shader_buffer_descriptors_idx(shader),
> + buf, old_va);
Please put braces around multi-line blocks. Not having them is an
unnecessary source of silly mistakes.
With that changed, patches 1-8 are
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> +
> + if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
> + /* Texture buffers - update virtual addresses in sampler view descriptors. */
> + LIST_FOR_EACH_ENTRY(view, &sctx->b.texture_buffers, list) {
> + if (view->base.texture == buf) {
> + si_desc_reset_buffer_offset(ctx, &view->state[4], old_va, buf);
> + }
> + }
> + /* Texture buffers - update bindings. */
> + for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> + struct si_sampler_views *views = &sctx->samplers[shader].views;
> + struct si_descriptors *descs =
> + si_sampler_descriptors(sctx, shader);
> + unsigned mask = views->enabled_mask;
> +
> + while (mask) {
> + unsigned i = u_bit_scan(&mask);
> + if (views->views[i]->texture == buf) {
> + si_desc_reset_buffer_offset(ctx,
> + descs->list +
> + i * 16 + 4,
> + old_va, buf);
> + descs->dirty_mask |= 1u << i;
> + sctx->descriptors_dirty |=
> + 1u << si_sampler_descriptors_idx(shader);
> +
> + radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> + rbuffer, RADEON_USAGE_READ,
> + RADEON_PRIO_SAMPLER_BUFFER,
> + true);
> + }
> }
> }
> }
>
> /* Shader images */
> - for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
> - struct si_images_info *images = &sctx->images[shader];
> - struct si_descriptors *descs =
> - si_image_descriptors(sctx, shader);
> - unsigned mask = images->enabled_mask;
> -
> - while (mask) {
> - unsigned i = u_bit_scan(&mask);
> -
> - if (images->views[i].resource == buf) {
> - if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
> - si_mark_image_range_valid(&images->views[i]);
> -
> - si_desc_reset_buffer_offset(
> - ctx, descs->list + i * 8 + 4,
> - old_va, buf);
> - descs->dirty_mask |= 1u << i;
> - sctx->descriptors_dirty |=
> - 1u << si_image_descriptors_idx(shader);
> -
> - radeon_add_to_buffer_list_check_mem(
> - &sctx->b, &sctx->b.gfx, rbuffer,
> - RADEON_USAGE_READWRITE,
> - RADEON_PRIO_SAMPLER_BUFFER, true);
> + if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
> + for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
> + struct si_images_info *images = &sctx->images[shader];
> + struct si_descriptors *descs =
> + si_image_descriptors(sctx, shader);
> + unsigned mask = images->enabled_mask;
> +
> + while (mask) {
> + unsigned i = u_bit_scan(&mask);
> +
> + if (images->views[i].resource == buf) {
> + if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
> + si_mark_image_range_valid(&images->views[i]);
> +
> + si_desc_reset_buffer_offset(
> + ctx, descs->list + i * 8 + 4,
> + old_va, buf);
> + descs->dirty_mask |= 1u << i;
> + sctx->descriptors_dirty |=
> + 1u << si_image_descriptors_idx(shader);
> +
> + radeon_add_to_buffer_list_check_mem(
> + &sctx->b, &sctx->b.gfx, rbuffer,
> + RADEON_USAGE_READWRITE,
> + RADEON_PRIO_SAMPLER_BUFFER, true);
> + }
> }
> }
> }
> }
>
> /* Update mutable image descriptor fields of all bound textures. */
> void si_update_all_texture_descriptors(struct si_context *sctx)
> {
> unsigned shader;
>
>
More information about the mesa-dev
mailing list