[Mesa-dev] [PATCH 08/11] radeonsi: use a global dirty mask for shader pointers
Marek Olšák
maraeo at gmail.com
Tue Jan 17 22:47:58 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
Only vertex buffers use a separate bool flag.
---
src/gallium/drivers/radeonsi/si_descriptors.c | 85 +++++++++++++++------------
src/gallium/drivers/radeonsi/si_pipe.h | 2 +
src/gallium/drivers/radeonsi/si_state.h | 2 -
src/gallium/drivers/radeonsi/si_state_draw.c | 2 +-
4 files changed, 51 insertions(+), 40 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a535fa0..deb6df9 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -239,21 +239,20 @@ static bool si_upload_descriptors(struct si_context *sctx,
(struct pipe_resource**)&desc->buffer, &ptr);
if (!desc->buffer)
return false; /* skip the draw call */
util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
desc->gpu_list = ptr;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
}
- desc->pointer_dirty = true;
desc->dirty_mask = 0;
if (atom)
si_mark_atom_dirty(sctx, atom);
return true;
}
static void
si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
@@ -1028,23 +1027,23 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
(struct r600_resource*)vb->buffer,
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
bound[ve->vertex_buffer_index] = true;
}
}
/* Don't flush the const cache. It would have a very negative effect
* on performance (confirmed by testing). New descriptors are always
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
- desc->pointer_dirty = true;
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
sctx->vertex_buffers_dirty = false;
+ sctx->vertex_buffer_pointer_dirty = true;
return true;
}
/* CONSTANT BUFFERS */
static unsigned
si_const_buffer_descriptors_idx(unsigned shader)
{
return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
@@ -1728,40 +1727,35 @@ void si_update_all_texture_descriptors(struct si_context *sctx)
si_update_compressed_tex_shader_mask(sctx, shader);
}
}
/* SHADER USER DATA */
static void si_mark_shader_pointers_dirty(struct si_context *sctx,
unsigned shader)
{
- struct si_descriptors *descs =
- &sctx->descriptors[SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS];
-
- for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
- descs->pointer_dirty = true;
+ sctx->shader_pointers_dirty |=
+ u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
+ SI_NUM_SHADER_DESCS);
if (shader == PIPE_SHADER_VERTEX)
- sctx->vertex_buffers.pointer_dirty = true;
+ sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
}
static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
{
- int i;
-
- for (i = 0; i < SI_NUM_SHADERS; i++) {
- si_mark_shader_pointers_dirty(sctx, i);
- }
- sctx->descriptors[SI_DESCS_RW_BUFFERS].pointer_dirty = true;
+ sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
+ sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
+ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
}
/* Set a base register address for user data constants in the given shader.
* This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
*/
static void si_set_user_data_base(struct si_context *sctx,
unsigned shader, uint32_t new_base)
{
uint32_t *base = &sctx->shader_userdata.sh_base[shader];
@@ -1800,84 +1794,95 @@ void si_shader_change_notify(struct si_context *sctx)
else
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
R_00B130_SPI_SHADER_USER_DATA_VS_0);
} else {
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
}
}
static void si_emit_shader_pointer(struct si_context *sctx,
struct si_descriptors *desc,
- unsigned sh_base, bool keep_dirty)
+ unsigned sh_base)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
uint64_t va;
- if (!desc->pointer_dirty || !desc->buffer)
- return;
+ assert(desc->buffer);
va = desc->buffer->gpu_address +
desc->buffer_offset;
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- desc->pointer_dirty = keep_dirty;
}
void si_emit_graphics_shader_userdata(struct si_context *sctx,
struct r600_atom *atom)
{
- unsigned shader;
+ unsigned mask;
uint32_t *sh_base = sctx->shader_userdata.sh_base;
struct si_descriptors *descs;
descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
- if (descs->pointer_dirty) {
+ if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
si_emit_shader_pointer(sctx, descs,
- R_00B030_SPI_SHADER_USER_DATA_PS_0, true);
+ R_00B030_SPI_SHADER_USER_DATA_PS_0);
si_emit_shader_pointer(sctx, descs,
- R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
si_emit_shader_pointer(sctx, descs,
- R_00B230_SPI_SHADER_USER_DATA_GS_0, true);
+ R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_emit_shader_pointer(sctx, descs,
- R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
si_emit_shader_pointer(sctx, descs,
- R_00B430_SPI_SHADER_USER_DATA_HS_0, true);
- descs->pointer_dirty = false;
+ R_00B430_SPI_SHADER_USER_DATA_HS_0);
}
- descs = &sctx->descriptors[SI_DESCS_FIRST_SHADER];
+ mask = sctx->shader_pointers_dirty &
+ u_bit_consecutive(SI_DESCS_FIRST_SHADER,
+ SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
- for (shader = 0; shader < SI_NUM_GRAPHICS_SHADERS; shader++) {
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+ unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS;
unsigned base = sh_base[shader];
- unsigned i;
- if (!base)
- continue;
+ if (base)
+ si_emit_shader_pointer(sctx, descs + i, base);
+ }
+ sctx->shader_pointers_dirty &=
+ ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
- for (i = 0; i < SI_NUM_SHADER_DESCS; i++, descs++)
- si_emit_shader_pointer(sctx, descs, base, false);
+ if (sctx->vertex_buffer_pointer_dirty) {
+ si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
+ sh_base[PIPE_SHADER_VERTEX]);
+ sctx->vertex_buffer_pointer_dirty = false;
}
- si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
}
void si_emit_compute_shader_userdata(struct si_context *sctx)
{
unsigned base = R_00B900_COMPUTE_USER_DATA_0;
- struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_FIRST_COMPUTE];
+ struct si_descriptors *descs = sctx->descriptors;
+ unsigned compute_mask =
+ u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS);
+ unsigned mask = sctx->shader_pointers_dirty & compute_mask;
- for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
- si_emit_shader_pointer(sctx, descs, base, false);
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+
+ si_emit_shader_pointer(sctx, descs + i, base);
+ }
+ sctx->shader_pointers_dirty &= ~compute_mask;
}
/* INIT/DEINIT/UPLOAD */
void si_init_all_descriptors(struct si_context *sctx)
{
int i;
unsigned ce_offset = 0;
for (i = 0; i < SI_NUM_SHADERS; i++) {
@@ -1932,20 +1937,23 @@ void si_init_all_descriptors(struct si_context *sctx)
si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
}
bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
{
const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
unsigned dirty = sctx->descriptors_dirty & mask;
+ /* Assume nothing will go wrong: */
+ sctx->shader_pointers_dirty |= dirty;
+
while (dirty) {
unsigned i = u_bit_scan(&dirty);
if (!si_upload_descriptors(sctx, &sctx->descriptors[i],
&sctx->shader_userdata.atom))
return false;
}
sctx->descriptors_dirty &= ~mask;
return true;
@@ -1953,20 +1961,23 @@ bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
bool si_upload_compute_shader_descriptors(struct si_context *sctx)
{
/* Does not update rw_buffers as that is not needed for compute shaders
* and the input buffer is using the same SGPR's anyway.
*/
const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
unsigned dirty = sctx->descriptors_dirty & mask;
+ /* Assume nothing will go wrong: */
+ sctx->shader_pointers_dirty |= dirty;
+
while (dirty) {
unsigned i = u_bit_scan(&dirty);
if (!si_upload_descriptors(sctx, &sctx->descriptors[i], NULL))
return false;
}
sctx->descriptors_dirty &= ~mask;
return true;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e7d071d..421e2a4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -261,40 +261,42 @@ struct si_context {
/* shader information */
struct si_vertex_element *vertex_elements;
unsigned sprite_coord_enable;
bool flatshade;
bool do_update_shaders;
/* shader descriptors */
struct si_descriptors vertex_buffers;
struct si_descriptors descriptors[SI_NUM_DESCS];
unsigned descriptors_dirty;
+ unsigned shader_pointers_dirty;
unsigned compressed_tex_shader_mask;
struct si_buffer_resources rw_buffers;
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
struct si_buffer_resources shader_buffers[SI_NUM_SHADERS];
struct si_textures_info samplers[SI_NUM_SHADERS];
struct si_images_info images[SI_NUM_SHADERS];
/* other shader resources */
struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
struct pipe_resource *esgs_ring;
struct pipe_resource *gsvs_ring;
struct pipe_resource *tf_ring;
struct pipe_resource *tess_offchip_ring;
union pipe_color_union *border_color_table; /* in CPU memory, any endian */
struct r600_resource *border_color_buffer;
union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */
unsigned border_color_count;
/* Vertex and index buffers. */
bool vertex_buffers_dirty;
+ bool vertex_buffer_pointer_dirty;
struct pipe_index_buffer index_buffer;
struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS];
/* MSAA config state. */
int ps_iter_samples;
bool smoothing_enabled;
/* DB render state. */
bool dbcb_depth_copy_enabled;
bool dbcb_stencil_copy_enabled;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index edc5b93..34a0f57 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -230,22 +230,20 @@ struct si_descriptors {
/* elements of the list that are changed and need to be uploaded */
unsigned dirty_mask;
/* Whether the CE ram is dirty and needs to be reinitialized entirely
* before we can do partial updates. */
bool ce_ram_dirty;
/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
* array will be stored. */
unsigned shader_userdata_offset;
- /* Whether the pointer should be re-emitted. */
- bool pointer_dirty;
};
struct si_sampler_views {
struct pipe_sampler_view *views[SI_NUM_SAMPLERS];
struct si_sampler_state *sampler_states[SI_NUM_SAMPLERS];
/* The i-th bit is set if that element is enabled (non-NULL resource). */
unsigned enabled_mask;
};
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 96a0e84..837c025 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1139,21 +1139,21 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
if (si_pm4_state_changed(sctx, es))
cik_prefetch_shader_async(sctx, sctx->queued.named.es);
if (si_pm4_state_changed(sctx, gs))
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
if (si_pm4_state_changed(sctx, vs))
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
/* Vertex buffer descriptors are uploaded uncached, so prefetch
* them right after the VS binary. */
- if (sctx->vertex_buffers.pointer_dirty) {
+ if (sctx->vertex_buffer_pointer_dirty) {
cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
sctx->vertex_buffers.buffer_offset,
sctx->vertex_elements->count * 16);
}
if (si_pm4_state_changed(sctx, ps))
cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
}
/* Emit states. */
mask = sctx->dirty_atoms;
--
2.7.4
More information about the mesa-dev
mailing list