[Mesa-dev] [PATCH 16/17] radeonsi: atomize the scratch buffer state
Marek Olšák
maraeo at gmail.com
Mon Jan 30 11:51:09 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
The update frequency is very low.
Difference: Only account for the size when allocating a new one and when
starting a new IB. (v2)
---
src/gallium/drivers/radeonsi/si_cp_dma.c | 3 ++-
src/gallium/drivers/radeonsi/si_hw_context.c | 5 ++++-
src/gallium/drivers/radeonsi/si_pipe.h | 2 +-
src/gallium/drivers/radeonsi/si_state.h | 1 +
src/gallium/drivers/radeonsi/si_state_draw.c | 24 ------------------------
src/gallium/drivers/radeonsi/si_state_shaders.c | 24 ++++++++++++++++++++++--
6 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b398256..e198765 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -276,21 +276,22 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
* idle at this point.
*/
if (!sctx->scratch_buffer ||
sctx->scratch_buffer->b.b.width0 < scratch_size) {
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = (struct r600_resource*)
pipe_buffer_create(&sctx->screen->b.b, 0,
PIPE_USAGE_DEFAULT, scratch_size);
if (!sctx->scratch_buffer)
return;
- sctx->emit_scratch_reloc = true;
+
+ si_mark_atom_dirty(sctx, &sctx->scratch_state);
}
si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
&sctx->scratch_buffer->b.b, size, size, user_flags,
is_first, &dma_flags);
va = sctx->scratch_buffer->gpu_address;
si_emit_cp_dma(sctx, va, va + CP_DMA_ALIGNMENT, size, dma_flags,
R600_COHERENCY_SHADER);
}
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index e5da730..c504ac5 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -228,34 +228,37 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);
ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
si_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
si_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
+ si_mark_atom_dirty(ctx, &ctx->scratch_state);
+ r600_context_add_resource_size(&ctx->b.b,
+ &ctx->scratch_buffer->b.b);
+
r600_postflush_resume_features(&ctx->b);
assert(!ctx->b.gfx.cs->prev_dw);
ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
/* Invalidate various draw states so that they are emitted before
* the first draw call. */
si_invalidate_draw_sh_constants(ctx);
ctx->last_index_size = -1;
ctx->last_primitive_restart_en = -1;
ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
ctx->last_gs_out_prim = -1;
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
ctx->last_rast_prim = -1;
ctx->last_sc_line_stipple = ~0;
- ctx->emit_scratch_reloc = true;
ctx->last_ls = NULL;
ctx->last_tcs = NULL;
ctx->last_tes_sh_base = -1;
ctx->last_num_tcs_input_cp = -1;
ctx->cs_shader_state.initialized = false;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 724d89e..d17d55a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -345,22 +345,22 @@ struct si_context {
int last_restart_index;
int last_gs_out_prim;
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
unsigned last_sc_line_stipple;
int current_rast_prim; /* primitive type after TES, GS */
bool gs_tri_strip_adj_fix;
/* Scratch buffer */
+ struct r600_atom scratch_state;
struct r600_resource *scratch_buffer;
- bool emit_scratch_reloc;
unsigned scratch_waves;
unsigned spi_tmpring_size;
struct r600_resource *compute_scratch_buffer;
/* Emitted derived tessellation state. */
struct si_shader *last_ls; /* local shader (VS) */
struct si_shader_selector *last_tcs;
int last_num_tcs_input_cp;
int last_tes_sh_base;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 19880c5..6bb0673 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -145,20 +145,21 @@ union si_state_atoms {
struct r600_atom *sample_mask;
struct r600_atom *cb_render_state;
struct r600_atom *blend_color;
struct r600_atom *clip_regs;
struct r600_atom *clip_state;
struct r600_atom *shader_userdata;
struct r600_atom *scissors;
struct r600_atom *viewports;
struct r600_atom *stencil_ref;
struct r600_atom *spi_map;
+ struct r600_atom *scratch_state;
} s;
struct r600_atom *array[0];
};
#define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct r600_atom*))
struct si_shader_data {
struct r600_atom atom;
uint32_t sh_base[SI_NUM_SHADERS];
};
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 1ce8ac8..e4a6b7f 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -452,39 +452,20 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) &&
(info->indirect ||
(info->instance_count > 1 &&
si_num_prims_for_vertices(info) <= 1)))
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
}
return ia_multi_vgt_param;
}
-static void si_emit_scratch_reloc(struct si_context *sctx)
-{
- struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-
- if (!sctx->emit_scratch_reloc)
- return;
-
- radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
- sctx->spi_tmpring_size);
-
- if (sctx->scratch_buffer) {
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- sctx->scratch_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_SCRATCH_BUFFER);
-
- }
- sctx->emit_scratch_reloc = false;
-}
-
/* rast_prim is the primitive type after GS. */
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer;
/* Skip this if not rendering lines. */
if (rast_prim != PIPE_PRIM_LINES &&
rast_prim != PIPE_PRIM_LINE_LOOP &&
@@ -1120,22 +1101,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_resource(info->indirect)->TC_L2_dirty = false;
}
if (info->indirect_params &&
r600_resource(info->indirect_params)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
r600_resource(info->indirect_params)->TC_L2_dirty = false;
}
/* Add buffer sizes for memory checking in need_cs_space. */
- if (sctx->emit_scratch_reloc && sctx->scratch_buffer)
- r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b);
if (info->indirect)
r600_context_add_resource_size(ctx, info->indirect);
si_need_cs_space(sctx);
/* Since we've called r600_context_add_resource_size for vertex buffers,
* this must be called after si_need_cs_space, because we must let
* need_cs_space flush before we add buffers to the buffer list.
*/
if (!si_upload_vertex_buffer_descriptors(sctx))
@@ -1161,28 +1140,25 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct si_pm4_state *state = sctx->queued.array[i];
if (!state || sctx->emitted.array[i] == state)
continue;
si_pm4_emit(sctx, state);
sctx->emitted.array[i] = state;
}
sctx->dirty_states = 0;
- si_emit_scratch_reloc(sctx);
si_emit_rasterizer_prim_state(sctx);
si_emit_draw_registers(sctx, info);
si_ce_pre_draw_synchronization(sctx);
-
si_emit_draw_packets(sctx, info, &ib);
-
si_ce_post_draw_synchronization(sctx);
if (sctx->trace_buf)
si_trace_emit(sctx);
/* Workaround for a VGT hang when streamout is enabled.
* It must be done after drawing. */
if ((sctx->b.family == CHIP_HAWAII ||
sctx->b.family == CHIP_TONGA ||
sctx->b.family == CHIP_FIJI) &&
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ebd2435..bde02f5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2193,21 +2193,24 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
if (scratch_needed_size > 0) {
if (scratch_needed_size > current_scratch_buffer_size) {
/* Create a bigger scratch buffer */
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = (struct r600_resource*)
pipe_buffer_create(&sctx->screen->b.b, 0,
PIPE_USAGE_DEFAULT, scratch_needed_size);
if (!sctx->scratch_buffer)
return false;
- sctx->emit_scratch_reloc = true;
+
+ si_mark_atom_dirty(sctx, &sctx->scratch_state);
+ r600_context_add_resource_size(&sctx->b.b,
+ &sctx->scratch_buffer->b.b);
}
/* Update the shaders, so they are using the latest scratch. The
* scratch buffer may have been changed since these shaders were
* last used, so we still need to try to update them, even if
* they require scratch buffers smaller than the current size.
*/
r = si_update_scratch_buffer(sctx, sctx->ps_shader.current);
if (r < 0)
return false;
@@ -2252,21 +2255,21 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
}
/* The LLVM shader backend should be reporting aligned scratch_sizes. */
assert((scratch_needed_size & ~0x3FF) == scratch_needed_size &&
"scratch size should already be aligned correctly.");
spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
if (spi_tmpring_size != sctx->spi_tmpring_size) {
sctx->spi_tmpring_size = spi_tmpring_size;
- sctx->emit_scratch_reloc = true;
+ si_mark_atom_dirty(sctx, &sctx->scratch_state);
}
return true;
}
static void si_init_tess_factor_ring(struct si_context *sctx)
{
bool double_offchip_buffers = sctx->b.chip_class >= CIK;
unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
unsigned max_offchip_buffers = max_offchip_buffers_per_se *
sctx->screen->b.info.max_se;
@@ -2581,23 +2584,40 @@ bool si_update_shaders(struct si_context *sctx)
return false;
}
if (sctx->b.chip_class >= CIK)
si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
sctx->do_update_shaders = false;
return true;
}
+static void si_emit_scratch_state(struct si_context *sctx,
+ struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+
+ radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
+ sctx->spi_tmpring_size);
+
+ if (sctx->scratch_buffer) {
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ sctx->scratch_buffer, RADEON_USAGE_READWRITE,
+ RADEON_PRIO_SCRATCH_BUFFER);
+ }
+}
+
void si_init_shader_functions(struct si_context *sctx)
{
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
+ si_init_atom(sctx, &sctx->scratch_state, &sctx->atoms.s.scratch_state,
+ si_emit_scratch_state);
sctx->b.b.create_vs_state = si_create_shader_selector;
sctx->b.b.create_tcs_state = si_create_shader_selector;
sctx->b.b.create_tes_state = si_create_shader_selector;
sctx->b.b.create_gs_state = si_create_shader_selector;
sctx->b.b.create_fs_state = si_create_shader_selector;
sctx->b.b.bind_vs_state = si_bind_vs_shader;
sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
sctx->b.b.bind_tes_state = si_bind_tes_shader;
--
2.7.4
More information about the mesa-dev
mailing list