[Mesa-dev] [PATCH v2 10/12] radeonsi: Replace list_dirty with a mask.
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Sat Apr 16 23:43:07 UTC 2016
We can then upload only the dirty ones with the constant engine.
Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
---
src/gallium/drivers/radeonsi/si_descriptors.c | 37 ++++++++++++++++-----------
src/gallium/drivers/radeonsi/si_state.h | 9 +++++--
2 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 0b44ecf..8ca0253 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -109,7 +109,7 @@ static void si_init_descriptors(struct si_descriptors *desc,
desc->list = CALLOC(num_elements, element_dw_size * 4);
desc->element_dw_size = element_dw_size;
desc->num_elements = num_elements;
- desc->list_dirty = true; /* upload the list before the next draw */
+ desc->dirty_mask = num_elements == 64 ? ~0llu : (1llu << num_elements) - 1;
desc->shader_userdata_offset = shader_userdata_index * 4;
desc->ce_offset = *ce_offset;
@@ -159,7 +159,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
void *ptr;
- if (!desc->list_dirty)
+ if (!desc->dirty_mask)
return true;
u_upload_alloc(sctx->b.uploader, 0, list_size, 256,
@@ -173,7 +173,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
- desc->list_dirty = false;
+ desc->dirty_mask = 0;
desc->pointer_dirty = true;
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
return true;
@@ -216,6 +216,8 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
si_sampler_view_add_buffer(sctx, views->views[i]->texture);
}
+ views->desc.ce_ram_dirty = true;
+
if (!views->desc.buffer)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, views->desc.buffer,
@@ -267,7 +269,7 @@ static void si_set_sampler_view(struct si_context *sctx,
views->desc.enabled_mask &= ~(1llu << slot);
}
- views->desc.list_dirty = true;
+ views->desc.dirty_mask |= 1llu << slot;
}
static bool is_compressed_colortex(struct r600_texture *rtex)
@@ -373,6 +375,8 @@ si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *imag
si_sampler_view_add_buffer(sctx, view->resource);
}
+ images->desc.ce_ram_dirty = true;
+
if (images->desc.buffer) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
images->desc.buffer,
@@ -390,7 +394,7 @@ si_disable_shader_image(struct si_images_info *images, unsigned slot)
memcpy(images->desc.list + slot*8, null_image_descriptor, 8*4);
images->desc.enabled_mask &= ~(1llu << slot);
- images->desc.list_dirty = true;
+ images->desc.dirty_mask |= 1llu << slot;
}
}
@@ -471,7 +475,7 @@ si_set_shader_images(struct pipe_context *pipe, unsigned shader,
}
images->desc.enabled_mask |= 1llu << slot;
- images->desc.list_dirty = true;
+ images->desc.dirty_mask |= 1llu << slot;
}
}
@@ -529,7 +533,7 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
continue;
memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
- desc->list_dirty = true;
+ desc->dirty_mask |= 1llu << slot;
}
}
@@ -576,6 +580,8 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
buffers->shader_usage, buffers->priority);
}
+ buffers->desc.ce_ram_dirty = true;
+
if (!buffers->desc.buffer)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
@@ -772,7 +778,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
buffers->desc.enabled_mask &= ~(1llu << slot);
}
- buffers->desc.list_dirty = true;
+ buffers->desc.dirty_mask |= 1llu << slot;
}
/* SHADER BUFFERS */
@@ -819,9 +825,9 @@ static void si_set_shader_buffers(struct pipe_context *ctx, unsigned shader,
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buf,
buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1llu << slot;
+ buffers->desc.dirty_mask |= 1llu << slot;
}
- buffers->desc.list_dirty = true;
}
/* RING BUFFERS */
@@ -916,7 +922,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
buffers->desc.enabled_mask &= ~(1llu << slot);
}
- buffers->desc.list_dirty = true;
+ buffers->desc.dirty_mask |= 1llu << slot;
}
/* STREAMOUT BUFFERS */
@@ -1014,6 +1020,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
NULL);
buffers->desc.enabled_mask &= ~(1llu << bufidx);
}
+ buffers->desc.dirty_mask |= 1llu << bufidx;
}
for (; i < old_num_targets; i++) {
bufidx = SI_SO_BUF_OFFSET + i;
@@ -1021,9 +1028,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
memset(buffers->desc.list + bufidx*4, 0, sizeof(uint32_t) * 4);
pipe_resource_reference(&buffers->buffers[bufidx], NULL);
buffers->desc.enabled_mask &= ~(1llu << bufidx);
+ buffers->desc.dirty_mask |= 1llu << bufidx;
}
- buffers->desc.list_dirty = true;
}
static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
@@ -1075,7 +1082,7 @@ static void si_reset_buffer_resources(struct si_context *sctx,
si_desc_reset_buffer_offset(&sctx->b.b,
buffers->desc.list + i*4,
old_va, buf);
- buffers->desc.list_dirty = true;
+ buffers->desc.dirty_mask |= 1llu << i;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource *)buf,
@@ -1137,7 +1144,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
if (buffers->buffers[i] == buf) {
si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
old_va, buf);
- buffers->desc.list_dirty = true;
+ buffers->desc.dirty_mask |= 1llu << i;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rbuffer, buffers->shader_usage,
@@ -1182,7 +1189,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
views->desc.list +
i * 16 + 4,
old_va, buf);
- views->desc.list_dirty = true;
+ views->desc.dirty_mask |= 1llu << i;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rbuffer, RADEON_USAGE_READ,
@@ -1203,7 +1210,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
si_desc_reset_buffer_offset(
ctx, images->desc.list + i * 8 + 4,
old_va, buf);
- images->desc.list_dirty = true;
+ images->desc.dirty_mask |= 1llu << i;
radeon_add_to_buffer_list(
&sctx->b, &sctx->b.gfx, rbuffer,
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index fbdc8ee..a0ae72e 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -191,8 +191,6 @@ struct si_descriptors {
unsigned element_dw_size;
/* The maximum number of descriptors. */
unsigned num_elements;
- /* Whether the list has been changed and should be re-uploaded. */
- bool list_dirty;
/* The buffer where the descriptors have been uploaded. */
struct r600_resource *buffer;
@@ -204,6 +202,13 @@ struct si_descriptors {
/* The i-th bit is set if that element is enabled (non-NULL resource). */
uint64_t enabled_mask;
+ /* elements of the list that are changed and need to be uploaded */
+ uint64_t dirty_mask;
+
+ /* Whether the CE ram is dirty and needs to be reinitialized entirely
+ * before we can do partial updates. */
+ bool ce_ram_dirty;
+
/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
* array will be stored. */
unsigned shader_userdata_offset;
--
2.8.0
More information about the mesa-dev
mailing list