[Mesa-dev] [PATCH 2/2] radeonsi: update buffer descriptors in all contexts after buffer invalidation

Juan A. Suarez Romero jasuarez at igalia.com
Fri May 17 07:56:17 UTC 2019


On Fri, 2019-05-10 at 01:19 -0400, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108824
> 
> Cc: 19.1 <mesa-stable at lists.freedesktop.org>

Hi, Marek,

This patch didn't apply cleanly on 19.1 branch, so I've fixed the conflicts.

You can find the solved patch in 
https://gitlab.freedesktop.org/mesa/mesa/commit/21620c889e3fc78be13f096fcccca273cfd27a3d


Thanks!


	J.A.

> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 94 ++++++++++++-------
>  src/gallium/drivers/radeonsi/si_pipe.h        |  2 +
>  src/gallium/drivers/radeonsi/si_state_draw.c  |  9 +-
>  3 files changed, 72 insertions(+), 33 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 744fc9a15d7..6a4dcacc0f3 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -1580,242 +1580,272 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx)
>  		si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);
>  		si_images_update_needs_color_decompress_mask(&sctx->images[i]);
>  		si_update_shader_needs_decompress_mask(sctx, i);
>  	}
>  
>  	si_resident_handles_update_needs_color_decompress(sctx);
>  }
>  
>  /* BUFFER DISCARD/INVALIDATION */
>  
> -/** Reset descriptors of buffer resources after \p buf has been invalidated. */
> +/* Reset descriptors of buffer resources after \p buf has been invalidated.
> + * If buf == NULL, reset all descriptors.
> + */
>  static void si_reset_buffer_resources(struct si_context *sctx,
>  				      struct si_buffer_resources *buffers,
>  				      unsigned descriptors_idx,
>  				      unsigned slot_mask,
>  				      struct pipe_resource *buf,
>  				      enum radeon_bo_priority priority)
>  {
>  	struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
>  	unsigned mask = buffers->enabled_mask & slot_mask;
>  
>  	while (mask) {
>  		unsigned i = u_bit_scan(&mask);
> -		if (buffers->buffers[i] == buf) {
> -			si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
> +		struct pipe_resource *buffer = buffers->buffers[i];
> +
> +		if (buffer && (!buf || buffer == buf)) {
> +			si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i],
>  						descs->list + i*4);
>  			sctx->descriptors_dirty |= 1u << descriptors_idx;
>  
>  			radeon_add_to_gfx_buffer_list_check_mem(sctx,
> -								si_resource(buf),
> +								si_resource(buffer),
>  								buffers->writable_mask & (1u << i) ?
>  									RADEON_USAGE_READWRITE :
>  									RADEON_USAGE_READ,
>  								priority, true);
>  		}
>  	}
>  }
>  
> -/* Update all resource bindings where the buffer is bound, including
> +/* Update all buffer bindings where the buffer is bound, including
>   * all resource descriptors. This is invalidate_buffer without
> - * the invalidation. */
> + * the invalidation.
> + *
> + * If buf == NULL, update all buffer bindings.
> + */
>  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
>  {
>  	struct si_resource *buffer = si_resource(buf);
>  	unsigned i, shader;
>  	unsigned num_elems = sctx->vertex_elements ?
>  				       sctx->vertex_elements->count : 0;
>  
>  	/* We changed the buffer, now we need to bind it where the old one
>  	 * was bound. This consists of 2 things:
>  	 *   1) Updating the resource descriptor and dirtying it.
>  	 *   2) Adding a relocation to the CS, so that it's usable.
>  	 */
>  
>  	/* Vertex buffers. */
> -	if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
> +	if (!buffer) {
> +		if (num_elems)
> +			sctx->vertex_buffers_dirty = true;
> +	} else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
>  		for (i = 0; i < num_elems; i++) {
>  			int vb = sctx->vertex_elements->vertex_buffer_index[i];
>  
>  			if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
>  				continue;
>  			if (!sctx->vertex_buffer[vb].buffer.resource)
>  				continue;
>  
>  			if (sctx->vertex_buffer[vb].buffer.resource == buf) {
>  				sctx->vertex_buffers_dirty = true;
>  				break;
>  			}
>  		}
>  	}
>  
>  	/* Streamout buffers. (other internal buffers can't be invalidated) */
> -	if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
> +	if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
>  		for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
>  			struct si_buffer_resources *buffers = &sctx->rw_buffers;
>  			struct si_descriptors *descs =
>  				&sctx->descriptors[SI_DESCS_RW_BUFFERS];
> +			struct pipe_resource *buffer = buffers->buffers[i];
>  
> -			if (buffers->buffers[i] != buf)
> +			if (!buffer || (buf && buffer != buf))
>  				continue;
>  
> -			si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
> +			si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i],
>  						descs->list + i*4);
>  			sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
>  
>  			radeon_add_to_gfx_buffer_list_check_mem(sctx,
> -								buffer, RADEON_USAGE_WRITE,
> +								si_resource(buffer),
> +								RADEON_USAGE_WRITE,
>  								RADEON_PRIO_SHADER_RW_BUFFER,
>  								true);
>  
>  			/* Update the streamout state. */
>  			if (sctx->streamout.begin_emitted)
>  				si_emit_streamout_end(sctx);
>  			sctx->streamout.append_bitmask =
>  					sctx->streamout.enabled_mask;
>  			si_streamout_buffers_dirty(sctx);
>  		}
>  	}
>  
>  	/* Constant and shader buffers. */
> -	if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
> +	if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
>  		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
>  			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
>  						  si_const_and_shader_buffer_descriptors_idx(shader),
>  						  u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
>  						  buf,
>  						  sctx->const_and_shader_buffers[shader].priority_constbuf);
>  	}
>  
> -	if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
> +	if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
>  		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
>  			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
>  						  si_const_and_shader_buffer_descriptors_idx(shader),
>  						  u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
>  						  buf,
>  						  sctx->const_and_shader_buffers[shader].priority);
>  	}
>  
> -	if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
> +	if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
>  		/* Texture buffers - update bindings. */
>  		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
>  			struct si_samplers *samplers = &sctx->samplers[shader];
>  			struct si_descriptors *descs =
>  				si_sampler_and_image_descriptors(sctx, shader);
>  			unsigned mask = samplers->enabled_mask;
>  
>  			while (mask) {
>  				unsigned i = u_bit_scan(&mask);
> -				if (samplers->views[i]->texture == buf) {
> +				struct pipe_resource *buffer = samplers->views[i]->texture;
> +
> +				if (buffer && (!buf || buffer == buf)) {
>  					unsigned desc_slot = si_get_sampler_slot(i);
>  
> -					si_set_buf_desc_address(si_resource(buf),
> +					si_set_buf_desc_address(si_resource(buffer),
>  								samplers->views[i]->u.buf.offset,
>  								descs->list + desc_slot * 16 + 4);
>  					sctx->descriptors_dirty |=
>  						1u << si_sampler_and_image_descriptors_idx(shader);
>  
> -					radeon_add_to_gfx_buffer_list_check_mem(sctx,
> -									    buffer, RADEON_USAGE_READ,
> -									    RADEON_PRIO_SAMPLER_BUFFER,
> -									    true);
> +					radeon_add_to_gfx_buffer_list_check_mem(
> +						sctx, si_resource(buffer),
> +						RADEON_USAGE_READ,
> +						RADEON_PRIO_SAMPLER_BUFFER, true);
>  				}
>  			}
>  		}
>  	}
>  
>  	/* Shader images */
> -	if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
> +	if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
>  		for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
>  			struct si_images *images = &sctx->images[shader];
>  			struct si_descriptors *descs =
>  				si_sampler_and_image_descriptors(sctx, shader);
>  			unsigned mask = images->enabled_mask;
>  
>  			while (mask) {
>  				unsigned i = u_bit_scan(&mask);
> +				struct pipe_resource *buffer = images->views[i].resource;
>  
> -				if (images->views[i].resource == buf) {
> +				if (buffer && (!buf || buffer == buf)) {
>  					unsigned desc_slot = si_get_image_slot(i);
>  
>  					if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
>  						si_mark_image_range_valid(&images->views[i]);
>  
> -					si_set_buf_desc_address(si_resource(buf),
> +					si_set_buf_desc_address(si_resource(buffer),
>  								images->views[i].u.buf.offset,
>  								descs->list + desc_slot * 8 + 4);
>  					sctx->descriptors_dirty |=
>  						1u << si_sampler_and_image_descriptors_idx(shader);
>  
>  					radeon_add_to_gfx_buffer_list_check_mem(
> -						sctx, buffer,
> +						sctx, si_resource(buffer),
>  						RADEON_USAGE_READWRITE,
>  						RADEON_PRIO_SAMPLER_BUFFER, true);
>  				}
>  			}
>  		}
>  	}
>  
>  	/* Bindless texture handles */
> -	if (buffer->texture_handle_allocated) {
> +	if (!buffer || buffer->texture_handle_allocated) {
>  		struct si_descriptors *descs = &sctx->bindless_descriptors;
>  
>  		util_dynarray_foreach(&sctx->resident_tex_handles,
>  				      struct si_texture_handle *, tex_handle) {
>  			struct pipe_sampler_view *view = (*tex_handle)->view;
>  			unsigned desc_slot = (*tex_handle)->desc_slot;
> +			struct pipe_resource *buffer = view->texture;
>  
> -			if (view->texture == buf) {
> -				si_set_buf_desc_address(buffer,
> +			if (buffer && (!buf || buffer == buf)) {
> +				si_set_buf_desc_address(si_resource(buffer),
>  							view->u.buf.offset,
>  							descs->list +
>  							desc_slot * 16 + 4);
>  
>  				(*tex_handle)->desc_dirty = true;
>  				sctx->bindless_descriptors_dirty = true;
>  
>  				radeon_add_to_gfx_buffer_list_check_mem(
> -					sctx, buffer,
> +					sctx, si_resource(buffer),
>  					RADEON_USAGE_READ,
>  					RADEON_PRIO_SAMPLER_BUFFER, true);
>  			}
>  		}
>  	}
>  
>  	/* Bindless image handles */
> -	if (buffer->image_handle_allocated) {
> +	if (!buffer || buffer->image_handle_allocated) {
>  		struct si_descriptors *descs = &sctx->bindless_descriptors;
>  
>  		util_dynarray_foreach(&sctx->resident_img_handles,
>  				      struct si_image_handle *, img_handle) {
>  			struct pipe_image_view *view = &(*img_handle)->view;
>  			unsigned desc_slot = (*img_handle)->desc_slot;
> +			struct pipe_resource *buffer = view->resource;
>  
> -			if (view->resource == buf) {
> +			if (buffer && (!buf || buffer == buf)) {
>  				if (view->access & PIPE_IMAGE_ACCESS_WRITE)
>  					si_mark_image_range_valid(view);
>  
> -				si_set_buf_desc_address(buffer,
> +				si_set_buf_desc_address(si_resource(buffer),
>  							view->u.buf.offset,
>  							descs->list +
>  							desc_slot * 16 + 4);
>  
>  				(*img_handle)->desc_dirty = true;
>  				sctx->bindless_descriptors_dirty = true;
>  
>  				radeon_add_to_gfx_buffer_list_check_mem(
> -					sctx, buffer,
> +					sctx, si_resource(buffer),
>  					RADEON_USAGE_READWRITE,
>  					RADEON_PRIO_SAMPLER_BUFFER, true);
>  			}
>  		}
>  	}
> +
> +	if (buffer) {
> +		/* Do the same for other contexts. They will invoke this function
> +		 * with buffer == NULL.
> +		 */
> +		unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter);
> +
> +		/* Skip the update for the current context, because we have already updated
> +		 * the buffer bindings.
> +		 */
> +		if (new_counter == sctx->last_dirty_buf_counter + 1)
> +			sctx->last_dirty_buf_counter = new_counter;
> +	}
>  }
>  
>  static void si_upload_bindless_descriptor(struct si_context *sctx,
>  					  unsigned desc_slot,
>  					  unsigned num_dwords)
>  {
>  	struct si_descriptors *desc = &sctx->bindless_descriptors;
>  	unsigned desc_slot_offset = desc_slot * 16;
>  	uint32_t *data;
>  	uint64_t va;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index d3ddb912245..949fa0755cb 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -519,20 +519,21 @@ struct si_screen {
>  	struct si_perfcounters	*perfcounters;
>  
>  	/* If pipe_screen wants to recompute and re-emit the framebuffer,
>  	 * sampler, and image states of all contexts, it should atomically
>  	 * increment this.
>  	 *
>  	 * Each context will compare this with its own last known value of
>  	 * the counter before drawing and re-emit the states accordingly.
>  	 */
>  	unsigned			dirty_tex_counter;
> +	unsigned			dirty_buf_counter;
>  
>  	/* Atomically increment this counter when an existing texture's
>  	 * metadata is enabled or disabled in a way that requires changing
>  	 * contexts' compressed texture binding masks.
>  	 */
>  	unsigned			compressed_colortex_counter;
>  
>  	struct {
>  		/* Context flags to set so that all writes from earlier jobs
>  		 * in the CP are seen by L2 clients.
> @@ -845,20 +846,21 @@ struct si_context {
>  
>  	bool				has_graphics;
>  	bool				gfx_flush_in_progress:1;
>  	bool				gfx_last_ib_is_busy:1;
>  	bool				compute_is_busy:1;
>  
>  	unsigned			num_gfx_cs_flushes;
>  	unsigned			initial_gfx_cs_size;
>  	unsigned			gpu_reset_counter;
>  	unsigned			last_dirty_tex_counter;
> +	unsigned			last_dirty_buf_counter;
>  	unsigned			last_compressed_colortex_counter;
>  	unsigned			last_num_draw_calls;
>  	unsigned			flags; /* flush flags */
>  	/* Current unaccounted memory usage. */
>  	uint64_t			vram;
>  	uint64_t			gtt;
>  
>  	/* Atoms (direct states). */
>  	union si_state_atoms		atoms;
>  	unsigned			dirty_atoms; /* mask */
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 8e01e1b35e1..d9dfef0a381 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1247,21 +1247,21 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
>  	/* Emit draw states. */
>  	si_emit_vs_state(sctx, info);
>  	si_emit_draw_registers(sctx, info, num_patches);
>  }
>  
>  static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>  {
>  	struct si_context *sctx = (struct si_context *)ctx;
>  	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
>  	struct pipe_resource *indexbuf = info->index.resource;
> -	unsigned dirty_tex_counter;
> +	unsigned dirty_tex_counter, dirty_buf_counter;
>  	enum pipe_prim_type rast_prim;
>  	unsigned index_size = info->index_size;
>  	unsigned index_offset = info->indirect ? info->start * index_size : 0;
>  
>  	if (likely(!info->indirect)) {
>  		/* SI-CI treat instance_count==0 as instance_count==1. There is
>  		 * no workaround for indirect draws, but we can at least skip
>  		 * direct draws.
>  		 */
>  		if (unlikely(!info->instance_count))
> @@ -1285,20 +1285,27 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
>  	dirty_tex_counter = p_atomic_read(&sctx->screen->dirty_tex_counter);
>  	if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) {
>  		sctx->last_dirty_tex_counter = dirty_tex_counter;
>  		sctx->framebuffer.dirty_cbufs |=
>  			((1 << sctx->framebuffer.state.nr_cbufs) - 1);
>  		sctx->framebuffer.dirty_zsbuf = true;
>  		si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
>  		si_update_all_texture_descriptors(sctx);
>  	}
>  
> +	dirty_buf_counter = p_atomic_read(&sctx->screen->dirty_buf_counter);
> +	if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) {
> +		sctx->last_dirty_buf_counter = dirty_buf_counter;
> +		/* Rebind all buffers unconditionally. */
> +		si_rebind_buffer(sctx, NULL);
> +	}
> +
>  	si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
>  
>  	/* Set the rasterization primitive type.
>  	 *
>  	 * This must be done after si_decompress_textures, which can call
>  	 * draw_vbo recursively, and before si_update_shaders, which uses
>  	 * current_rast_prim for this draw_vbo call. */
>  	if (sctx->gs_shader.cso)
>  		rast_prim = sctx->gs_shader.cso->gs_output_prim;
>  	else if (sctx->tes_shader.cso) {



More information about the mesa-dev mailing list