[Mesa-dev] [PATCH 3/5] radeonsi: remove the cache_flush atom

Marek Olšák maraeo at gmail.com
Fri Sep 9 15:05:53 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_compute.c    |  2 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c     |  2 +-
 src/gallium/drivers/radeonsi/si_hw_context.c |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h       |  1 -
 src/gallium/drivers/radeonsi/si_state.c      |  1 -
 src/gallium/drivers/radeonsi/si_state.h      |  3 +--
 src/gallium/drivers/radeonsi/si_state_draw.c | 10 +++++-----
 7 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 5041761..f43c616 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -467,21 +467,21 @@ static void si_launch_grid(
 	if (info->indirect)
 		r600_context_add_resource_size(ctx, info->indirect);
 	/* TODO: add the scratch buffer */
 
 	si_need_cs_space(sctx);
 
 	if (!sctx->cs_shader_state.initialized)
 		si_initialize_compute(sctx);
 
 	if (sctx->b.flags)
-		si_emit_cache_flush(sctx, NULL);
+		si_emit_cache_flush(sctx);
 
 	if (!si_switch_compute_shader(sctx, program, &program->shader, info->pc))
 		return;
 
 	si_upload_compute_shader_descriptors(sctx);
 	si_emit_compute_shader_userdata(sctx);
 
 	if (si_is_atom_dirty(sctx, sctx->atoms.s.render_cond)) {
 		sctx->atoms.s.render_cond->emit(&sctx->b,
 		                                sctx->atoms.s.render_cond);
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 7d4edc0..08d3dfe 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -163,21 +163,21 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
 				  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
 	if (src)
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 					  (struct r600_resource*)src,
 					  RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
 
 	/* Flush the caches for the first copy only.
 	 * Also wait for the previous CP DMA operations.
 	 */
 	if (sctx->b.flags) {
-		si_emit_cache_flush(sctx, NULL);
+		si_emit_cache_flush(sctx);
 		*flags |= SI_CP_DMA_RAW_WAIT;
 	}
 
 	/* Do the synchronization after the last dma, so that all data
 	 * is written to memory.
 	 */
 	if (byte_count == remaining_size)
 		*flags |= R600_CP_DMA_SYNC;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 24b0360..67e8352 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -108,21 +108,21 @@ void si_context_gfx_flush(void *context, unsigned flags,
 	r600_preflush_suspend_features(&ctx->b);
 
 	ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
 			SI_CONTEXT_PS_PARTIAL_FLUSH;
 
 	/* DRM 3.1.0 doesn't flush TC for VI correctly. */
 	if (ctx->b.chip_class == VI && ctx->b.screen->info.drm_minor <= 1)
 		ctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2 |
 				SI_CONTEXT_INV_VMEM_L1;
 
-	si_emit_cache_flush(ctx, NULL);
+	si_emit_cache_flush(ctx);
 
 	if (ctx->trace_buf)
 		si_trace_emit(ctx);
 
 	if (ctx->is_debug) {
 		/* Save the IB for debug contexts. */
 		radeon_clear_saved_cs(&ctx->last_gfx);
 		radeon_save_cs(ws, cs, &ctx->last_gfx);
 		r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
 		r600_resource_reference(&ctx->trace_buf, NULL);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index a648d86..1080e72 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -222,21 +222,20 @@ struct si_context {
 	bool				compute_is_busy;
 
 	/* Atoms (direct states). */
 	union si_state_atoms		atoms;
 	unsigned			dirty_atoms; /* mask */
 	/* PM4 states (precomputed immutable states) */
 	union si_state			queued;
 	union si_state			emitted;
 
 	/* Atom declarations. */
-	struct r600_atom		cache_flush;
 	struct si_framebuffer		framebuffer;
 	struct si_sample_locs		msaa_sample_locs;
 	struct r600_atom		db_render_state;
 	struct r600_atom		msaa_config;
 	struct si_sample_mask		sample_mask;
 	struct r600_atom		cb_render_state;
 	struct si_blend_color		blend_color;
 	struct r600_atom		clip_regs;
 	struct si_clip_state		clip_state;
 	struct si_shader_data		shader_userdata;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 0d3de9a..1703e42 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3434,21 +3434,20 @@ static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
 static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
 	si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
 	si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
 	si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
 	si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
 	si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
 
-	si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
 	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
 	si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
 	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
 	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
 	si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
 	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
 	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index d0e519c..e83b428 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -117,21 +117,20 @@ union si_state {
 		struct si_pm4_state		*vgt_shader_config;
 		struct si_pm4_state		*vs;
 		struct si_pm4_state		*ps;
 	} named;
 	struct si_pm4_state	*array[0];
 };
 
 union si_state_atoms {
 	struct {
 		/* The order matters. */
-		struct r600_atom *cache_flush;
 		struct r600_atom *render_cond;
 		struct r600_atom *streamout_begin;
 		struct r600_atom *streamout_enable; /* must be after streamout_begin */
 		struct r600_atom *framebuffer;
 		struct r600_atom *msaa_sample_locs;
 		struct r600_atom *db_render_state;
 		struct r600_atom *msaa_config;
 		struct r600_atom *sample_mask;
 		struct r600_atom *cb_render_state;
 		struct r600_atom *blend_color;
@@ -336,21 +335,21 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 			      unsigned force_level);
 
 /* si_state_shader.c */
 bool si_update_shaders(struct si_context *sctx);
 void si_init_shader_functions(struct si_context *sctx);
 bool si_init_shader_cache(struct si_screen *sscreen);
 void si_destroy_shader_cache(struct si_screen *sscreen);
 void si_init_shader_selector_async(void *job, int thread_index);
 
 /* si_state_draw.c */
-void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
+void si_emit_cache_flush(struct si_context *sctx);
 void si_ce_pre_draw_synchronization(struct si_context *sctx);
 void si_ce_post_draw_synchronization(struct si_context *sctx);
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
 void si_trace_emit(struct si_context *sctx);
 
 
 static inline unsigned
 si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
 {
 	if (stencil)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index d7325ff..d3e6e1a 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -699,21 +699,21 @@ static void si_emit_draw_packets(struct si_context *sctx,
 			radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
 		} else {
 			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
 			radeon_emit(cs, info->count);
 			radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
 				        S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
 		}
 	}
 }
 
-void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom)
+void si_emit_cache_flush(struct si_context *sctx)
 {
 	struct r600_common_context *rctx = &sctx->b;
 	struct radeon_winsys_cs *cs = rctx->gfx.cs;
 	uint32_t cp_coher_cntl = 0;
 
 	/* SI has a bug that it always flushes ICACHE and KCACHE if either
 	 * bit is set. An alternative way is to write SQC_CACHES, but that
 	 * doesn't seem to work reliably. Since the bug doesn't affect
 	 * correctness (it only does more work than necessary) and
 	 * the performance impact is likely negligible, there is no plan
@@ -1022,39 +1022,39 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
 		r600_resource(info->indirect)->TC_L2_dirty = false;
 	}
 
 	if (info->indirect_params &&
 	    r600_resource(info->indirect_params)->TC_L2_dirty) {
 		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
 		r600_resource(info->indirect_params)->TC_L2_dirty = false;
 	}
 
-	/* Check flush flags. */
-	if (sctx->b.flags)
-		si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
-
 	/* Add buffer sizes for memory checking in need_cs_space. */
 	if (sctx->emit_scratch_reloc && sctx->scratch_buffer)
 		r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b);
 	if (info->indirect)
 		r600_context_add_resource_size(ctx, info->indirect);
 
 	si_need_cs_space(sctx);
 
 	/* Since we've called r600_context_add_resource_size for vertex buffers,
 	 * this must be called after si_need_cs_space, because we must let
 	 * need_cs_space flush before we add buffers to the buffer list.
 	 */
 	if (!si_upload_vertex_buffer_descriptors(sctx))
 		return;
 
+	/* Flushed caches prior to emitting states. */
+	if (sctx->b.flags)
+		si_emit_cache_flush(sctx);
+
 	/* Emit states. */
 	mask = sctx->dirty_atoms;
 	while (mask) {
 		struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
 
 		atom->emit(&sctx->b, atom);
 	}
 	sctx->dirty_atoms = 0;
 
 	si_pm4_emit_dirty(sctx);
-- 
2.7.4



More information about the mesa-dev mailing list