[Mesa-dev] [PATCH 2/2] radeonsi: update buffer descriptors in all contexts after buffer invalidation
Marek Olšák
maraeo at gmail.com
Fri May 10 05:19:56 UTC 2019
From: Marek Olšák <marek.olsak at amd.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108824
Cc: 19.1 <mesa-stable at lists.freedesktop.org>
---
src/gallium/drivers/radeonsi/si_descriptors.c | 94 ++++++++++++-------
src/gallium/drivers/radeonsi/si_pipe.h | 2 +
src/gallium/drivers/radeonsi/si_state_draw.c | 9 +-
3 files changed, 72 insertions(+), 33 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 744fc9a15d7..6a4dcacc0f3 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1580,242 +1580,272 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx)
si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);
si_images_update_needs_color_decompress_mask(&sctx->images[i]);
si_update_shader_needs_decompress_mask(sctx, i);
}
si_resident_handles_update_needs_color_decompress(sctx);
}
/* BUFFER DISCARD/INVALIDATION */
-/** Reset descriptors of buffer resources after \p buf has been invalidated. */
+/* Reset descriptors of buffer resources after \p buf has been invalidated.
+ * If buf == NULL, reset all descriptors.
+ */
static void si_reset_buffer_resources(struct si_context *sctx,
struct si_buffer_resources *buffers,
unsigned descriptors_idx,
unsigned slot_mask,
struct pipe_resource *buf,
enum radeon_bo_priority priority)
{
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
unsigned mask = buffers->enabled_mask & slot_mask;
while (mask) {
unsigned i = u_bit_scan(&mask);
- if (buffers->buffers[i] == buf) {
- si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
+ struct pipe_resource *buffer = buffers->buffers[i];
+
+ if (buffer && (!buf || buffer == buf)) {
+ si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i],
descs->list + i*4);
sctx->descriptors_dirty |= 1u << descriptors_idx;
radeon_add_to_gfx_buffer_list_check_mem(sctx,
- si_resource(buf),
+ si_resource(buffer),
buffers->writable_mask & (1u << i) ?
RADEON_USAGE_READWRITE :
RADEON_USAGE_READ,
priority, true);
}
}
}
-/* Update all resource bindings where the buffer is bound, including
+/* Update all buffer bindings where the buffer is bound, including
* all resource descriptors. This is invalidate_buffer without
- * the invalidation. */
+ * the invalidation.
+ *
+ * If buf == NULL, update all buffer bindings.
+ */
void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
{
struct si_resource *buffer = si_resource(buf);
unsigned i, shader;
unsigned num_elems = sctx->vertex_elements ?
sctx->vertex_elements->count : 0;
/* We changed the buffer, now we need to bind it where the old one
* was bound. This consists of 2 things:
* 1) Updating the resource descriptor and dirtying it.
* 2) Adding a relocation to the CS, so that it's usable.
*/
/* Vertex buffers. */
- if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
+ if (!buffer) {
+ if (num_elems)
+ sctx->vertex_buffers_dirty = true;
+ } else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
for (i = 0; i < num_elems; i++) {
int vb = sctx->vertex_elements->vertex_buffer_index[i];
if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
continue;
if (!sctx->vertex_buffer[vb].buffer.resource)
continue;
if (sctx->vertex_buffer[vb].buffer.resource == buf) {
sctx->vertex_buffers_dirty = true;
break;
}
}
}
/* Streamout buffers. (other internal buffers can't be invalidated) */
- if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
+ if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
struct si_buffer_resources *buffers = &sctx->rw_buffers;
struct si_descriptors *descs =
&sctx->descriptors[SI_DESCS_RW_BUFFERS];
+ struct pipe_resource *buffer = buffers->buffers[i];
- if (buffers->buffers[i] != buf)
+ if (!buffer || (buf && buffer != buf))
continue;
- si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
+ si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i],
descs->list + i*4);
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
radeon_add_to_gfx_buffer_list_check_mem(sctx,
- buffer, RADEON_USAGE_WRITE,
+ si_resource(buffer),
+ RADEON_USAGE_WRITE,
RADEON_PRIO_SHADER_RW_BUFFER,
true);
/* Update the streamout state. */
if (sctx->streamout.begin_emitted)
si_emit_streamout_end(sctx);
sctx->streamout.append_bitmask =
sctx->streamout.enabled_mask;
si_streamout_buffers_dirty(sctx);
}
}
/* Constant and shader buffers. */
- if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
+ if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
si_const_and_shader_buffer_descriptors_idx(shader),
u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
buf,
sctx->const_and_shader_buffers[shader].priority_constbuf);
}
- if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
+ if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
si_const_and_shader_buffer_descriptors_idx(shader),
u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
buf,
sctx->const_and_shader_buffers[shader].priority);
}
- if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
+ if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
/* Texture buffers - update bindings. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_samplers *samplers = &sctx->samplers[shader];
struct si_descriptors *descs =
si_sampler_and_image_descriptors(sctx, shader);
unsigned mask = samplers->enabled_mask;
while (mask) {
unsigned i = u_bit_scan(&mask);
- if (samplers->views[i]->texture == buf) {
+ struct pipe_resource *buffer = samplers->views[i]->texture;
+
+ if (buffer && (!buf || buffer == buf)) {
unsigned desc_slot = si_get_sampler_slot(i);
- si_set_buf_desc_address(si_resource(buf),
+ si_set_buf_desc_address(si_resource(buffer),
samplers->views[i]->u.buf.offset,
descs->list + desc_slot * 16 + 4);
sctx->descriptors_dirty |=
1u << si_sampler_and_image_descriptors_idx(shader);
- radeon_add_to_gfx_buffer_list_check_mem(sctx,
- buffer, RADEON_USAGE_READ,
- RADEON_PRIO_SAMPLER_BUFFER,
- true);
+ radeon_add_to_gfx_buffer_list_check_mem(
+ sctx, si_resource(buffer),
+ RADEON_USAGE_READ,
+ RADEON_PRIO_SAMPLER_BUFFER, true);
}
}
}
}
/* Shader images */
- if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
+ if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
struct si_images *images = &sctx->images[shader];
struct si_descriptors *descs =
si_sampler_and_image_descriptors(sctx, shader);
unsigned mask = images->enabled_mask;
while (mask) {
unsigned i = u_bit_scan(&mask);
+ struct pipe_resource *buffer = images->views[i].resource;
- if (images->views[i].resource == buf) {
+ if (buffer && (!buf || buffer == buf)) {
unsigned desc_slot = si_get_image_slot(i);
if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
si_mark_image_range_valid(&images->views[i]);
- si_set_buf_desc_address(si_resource(buf),
+ si_set_buf_desc_address(si_resource(buffer),
images->views[i].u.buf.offset,
descs->list + desc_slot * 8 + 4);
sctx->descriptors_dirty |=
1u << si_sampler_and_image_descriptors_idx(shader);
radeon_add_to_gfx_buffer_list_check_mem(
- sctx, buffer,
+ sctx, si_resource(buffer),
RADEON_USAGE_READWRITE,
RADEON_PRIO_SAMPLER_BUFFER, true);
}
}
}
}
/* Bindless texture handles */
- if (buffer->texture_handle_allocated) {
+ if (!buffer || buffer->texture_handle_allocated) {
struct si_descriptors *descs = &sctx->bindless_descriptors;
util_dynarray_foreach(&sctx->resident_tex_handles,
struct si_texture_handle *, tex_handle) {
struct pipe_sampler_view *view = (*tex_handle)->view;
unsigned desc_slot = (*tex_handle)->desc_slot;
+ struct pipe_resource *buffer = view->texture;
- if (view->texture == buf) {
- si_set_buf_desc_address(buffer,
+ if (buffer && (!buf || buffer == buf)) {
+ si_set_buf_desc_address(si_resource(buffer),
view->u.buf.offset,
descs->list +
desc_slot * 16 + 4);
(*tex_handle)->desc_dirty = true;
sctx->bindless_descriptors_dirty = true;
radeon_add_to_gfx_buffer_list_check_mem(
- sctx, buffer,
+ sctx, si_resource(buffer),
RADEON_USAGE_READ,
RADEON_PRIO_SAMPLER_BUFFER, true);
}
}
}
/* Bindless image handles */
- if (buffer->image_handle_allocated) {
+ if (!buffer || buffer->image_handle_allocated) {
struct si_descriptors *descs = &sctx->bindless_descriptors;
util_dynarray_foreach(&sctx->resident_img_handles,
struct si_image_handle *, img_handle) {
struct pipe_image_view *view = &(*img_handle)->view;
unsigned desc_slot = (*img_handle)->desc_slot;
+ struct pipe_resource *buffer = view->resource;
- if (view->resource == buf) {
+ if (buffer && (!buf || buffer == buf)) {
if (view->access & PIPE_IMAGE_ACCESS_WRITE)
si_mark_image_range_valid(view);
- si_set_buf_desc_address(buffer,
+ si_set_buf_desc_address(si_resource(buffer),
view->u.buf.offset,
descs->list +
desc_slot * 16 + 4);
(*img_handle)->desc_dirty = true;
sctx->bindless_descriptors_dirty = true;
radeon_add_to_gfx_buffer_list_check_mem(
- sctx, buffer,
+ sctx, si_resource(buffer),
RADEON_USAGE_READWRITE,
RADEON_PRIO_SAMPLER_BUFFER, true);
}
}
}
+
+ if (buffer) {
+ /* Do the same for other contexts. They will invoke this function
+ * with buffer == NULL.
+ */
+ unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter);
+
+ /* Skip the update for the current context, because we have already updated
+ * the buffer bindings.
+ */
+ if (new_counter == sctx->last_dirty_buf_counter + 1)
+ sctx->last_dirty_buf_counter = new_counter;
+ }
}
static void si_upload_bindless_descriptor(struct si_context *sctx,
unsigned desc_slot,
unsigned num_dwords)
{
struct si_descriptors *desc = &sctx->bindless_descriptors;
unsigned desc_slot_offset = desc_slot * 16;
uint32_t *data;
uint64_t va;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d3ddb912245..949fa0755cb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -519,20 +519,21 @@ struct si_screen {
struct si_perfcounters *perfcounters;
/* If pipe_screen wants to recompute and re-emit the framebuffer,
* sampler, and image states of all contexts, it should atomically
* increment this.
*
* Each context will compare this with its own last known value of
* the counter before drawing and re-emit the states accordingly.
*/
unsigned dirty_tex_counter;
+ unsigned dirty_buf_counter;
/* Atomically increment this counter when an existing texture's
* metadata is enabled or disabled in a way that requires changing
* contexts' compressed texture binding masks.
*/
unsigned compressed_colortex_counter;
struct {
/* Context flags to set so that all writes from earlier jobs
* in the CP are seen by L2 clients.
@@ -845,20 +846,21 @@ struct si_context {
bool has_graphics;
bool gfx_flush_in_progress:1;
bool gfx_last_ib_is_busy:1;
bool compute_is_busy:1;
unsigned num_gfx_cs_flushes;
unsigned initial_gfx_cs_size;
unsigned gpu_reset_counter;
unsigned last_dirty_tex_counter;
+ unsigned last_dirty_buf_counter;
unsigned last_compressed_colortex_counter;
unsigned last_num_draw_calls;
unsigned flags; /* flush flags */
/* Current unaccounted memory usage. */
uint64_t vram;
uint64_t gtt;
/* Atoms (direct states). */
union si_state_atoms atoms;
unsigned dirty_atoms; /* mask */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 8e01e1b35e1..d9dfef0a381 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1247,21 +1247,21 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
/* Emit draw states. */
si_emit_vs_state(sctx, info);
si_emit_draw_registers(sctx, info, num_patches);
}
static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct pipe_resource *indexbuf = info->index.resource;
- unsigned dirty_tex_counter;
+ unsigned dirty_tex_counter, dirty_buf_counter;
enum pipe_prim_type rast_prim;
unsigned index_size = info->index_size;
unsigned index_offset = info->indirect ? info->start * index_size : 0;
if (likely(!info->indirect)) {
/* SI-CI treat instance_count==0 as instance_count==1. There is
* no workaround for indirect draws, but we can at least skip
* direct draws.
*/
if (unlikely(!info->instance_count))
@@ -1285,20 +1285,27 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
dirty_tex_counter = p_atomic_read(&sctx->screen->dirty_tex_counter);
if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) {
sctx->last_dirty_tex_counter = dirty_tex_counter;
sctx->framebuffer.dirty_cbufs |=
((1 << sctx->framebuffer.state.nr_cbufs) - 1);
sctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
si_update_all_texture_descriptors(sctx);
}
+ dirty_buf_counter = p_atomic_read(&sctx->screen->dirty_buf_counter);
+ if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) {
+ sctx->last_dirty_buf_counter = dirty_buf_counter;
+ /* Rebind all buffers unconditionally. */
+ si_rebind_buffer(sctx, NULL);
+ }
+
si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
/* Set the rasterization primitive type.
*
* This must be done after si_decompress_textures, which can call
* draw_vbo recursively, and before si_update_shaders, which uses
* current_rast_prim for this draw_vbo call. */
if (sctx->gs_shader.cso)
rast_prim = sctx->gs_shader.cso->gs_output_prim;
else if (sctx->tes_shader.cso) {
--
2.17.1
More information about the mesa-dev
mailing list