[Mesa-dev] [PATCH 08/15] r600g: rework cache flushing

Marek Olšák maraeo at gmail.com
Mon Jan 30 12:23:11 PST 2012


This also significantly improves the RV670 flush by using the CB1 flush
*always* and also DEST_BASE_0_ENA, which appears to magically fix some tests.
I am not entirely sure, but it's possible that RV670 flushing is fixed
completely.
---
 src/gallium/drivers/r600/evergreen_hw_context.c |   80 ++---------
 src/gallium/drivers/r600/evergreen_state.c      |   22 +--
 src/gallium/drivers/r600/r600.h                 |    7 +-
 src/gallium/drivers/r600/r600_hw_context.c      |  173 ++++++++++-------------
 src/gallium/drivers/r600/r600_pipe.c            |    2 +
 src/gallium/drivers/r600/r600_pipe.h            |   10 ++
 src/gallium/drivers/r600/r600_state.c           |   16 +--
 src/gallium/drivers/r600/r600_state_common.c    |   71 +++++++++
 src/gallium/drivers/r600/r600d.h                |    1 +
 9 files changed, 192 insertions(+), 190 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index f7fcf37..118ba26 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -274,14 +274,14 @@ static const struct r600_reg evergreen_context_reg_list[] = {
 	{R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0},
 	{R_028820_PA_CL_NANINF_CNTL, 0, 0, 0},
 	{R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0, 0},
-	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0, 0},
 	{R_028844_SQ_PGM_RESOURCES_PS, 0, 0, 0},
 	{R_028848_SQ_PGM_RESOURCES_2_PS, 0, 0, 0},
 	{R_02884C_SQ_PGM_EXPORTS_PS, 0, 0, 0},
-	{R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0, 0},
 	{R_028860_SQ_PGM_RESOURCES_VS, 0, 0, 0},
 	{R_028864_SQ_PGM_RESOURCES_2_VS, 0, 0, 0},
-	{R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0, 0},
 	{R_0288A8_SQ_PGM_RESOURCES_FS, 0, 0, 0},
 	{R_0288EC_SQ_LDS_ALLOC_PS, 0, 0, 0},
 	{R_028900_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0},
@@ -294,10 +294,10 @@ static const struct r600_reg evergreen_context_reg_list[] = {
 	{R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0, 0},
 	{R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0},
 	{R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0},
-	{R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0, 0},
+	{R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0, 0},
+	{R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0, 0},
+	{R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0, 0},
 	{R_028A00_PA_SU_POINT_SIZE, 0, 0, 0},
 	{R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0},
 	{R_028A08_PA_SU_LINE_CNTL, 0, 0, 0},
@@ -642,14 +642,14 @@ static const struct r600_reg cayman_context_reg_list[] = {
 	{R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0},
 	{R_028820_PA_CL_NANINF_CNTL, 0, 0, 0},
 	{R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0, 0},
-	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0, 0},
 	{R_028844_SQ_PGM_RESOURCES_PS, 0, 0, 0},
 	{R_028848_SQ_PGM_RESOURCES_2_PS, 0, 0, 0},
 	{R_02884C_SQ_PGM_EXPORTS_PS, 0, 0, 0},
-	{R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0, 0},
 	{R_028860_SQ_PGM_RESOURCES_VS, 0, 0, 0},
 	{R_028864_SQ_PGM_RESOURCES_2_VS, 0, 0, 0},
-	{R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0, 0},
 	{R_0288A8_SQ_PGM_RESOURCES_FS, 0, 0, 0},
 	{CM_R_0288E8_SQ_LDS_ALLOC, 0, 0, 0},
 	{R_0288EC_SQ_LDS_ALLOC_PS, 0, 0, 0},
@@ -663,10 +663,10 @@ static const struct r600_reg cayman_context_reg_list[] = {
 	{R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0, 0},
 	{R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0},
 	{R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0},
-	{R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0, 0},
+	{R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0, 0},
+	{R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0, 0},
+	{R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0, 0},
 	{R_028A00_PA_SU_POINT_SIZE, 0, 0, 0},
 	{R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0},
 	{R_028A08_PA_SU_LINE_CNTL, 0, 0, 0},
@@ -832,8 +832,8 @@ static const struct r600_reg cayman_context_reg_list[] = {
 static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride)
 {
 	struct r600_reg r600_shader_resource[] = {
-		{R_030000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF},
-		{R_030004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF},
+		{R_030000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, 0, 0},
+		{R_030004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, 0, 0},
 		{R_030008_RESOURCE0_WORD2, 0, 0, 0},
 		{R_03000C_RESOURCE0_WORD3, 0, 0, 0},
 		{R_030010_RESOURCE0_WORD4, 0, 0, 0},
@@ -1188,54 +1188,6 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 	cs->cdw += ndwords;
 }
 
-void evergreen_context_flush_dest_caches(struct r600_context *ctx)
-{
-	struct r600_resource *cb[12];
-	struct r600_resource *db;
-
-	if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
-		return;
-
-	/* find number of color buffer */
-	db = r600_context_reg_bo(ctx, R_028048_DB_Z_READ_BASE);
-	cb[0] = r600_context_reg_bo(ctx, R_028C60_CB_COLOR0_BASE);
-	cb[1] = r600_context_reg_bo(ctx, R_028C9C_CB_COLOR1_BASE);
-	cb[2] = r600_context_reg_bo(ctx, R_028CD8_CB_COLOR2_BASE);
-	cb[3] = r600_context_reg_bo(ctx, R_028D14_CB_COLOR3_BASE);
-	cb[4] = r600_context_reg_bo(ctx, R_028D50_CB_COLOR4_BASE);
-	cb[5] = r600_context_reg_bo(ctx, R_028D8C_CB_COLOR5_BASE);
-	cb[6] = r600_context_reg_bo(ctx, R_028DC8_CB_COLOR6_BASE);
-	cb[7] = r600_context_reg_bo(ctx, R_028E04_CB_COLOR7_BASE);
-	cb[8] = r600_context_reg_bo(ctx, R_028E40_CB_COLOR8_BASE);
-	cb[9] = r600_context_reg_bo(ctx, R_028E5C_CB_COLOR9_BASE);
-	cb[10] = r600_context_reg_bo(ctx, R_028E78_CB_COLOR10_BASE);
-	cb[11] = r600_context_reg_bo(ctx, R_028E94_CB_COLOR11_BASE);
-
-	/* flush color buffer */
-	for (int i = 0; i < 12; i++) {
-		if (cb[i]) {
-			unsigned flush;
-
-			if (i > 7) {
-				flush = (S_0085F0_CB8_DEST_BASE_ENA(1) << (i - 8)) |
-					S_0085F0_CB_ACTION_ENA(1);
-			} else {
-				flush = (S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
-					S_0085F0_CB_ACTION_ENA(1);
-			}
-			r600_context_bo_flush(ctx, flush, 0, cb[i]);
-		}
-	}
-	if (db) {
-		r600_context_bo_flush(ctx,
-					S_0085F0_DB_ACTION_ENA(1) |
-					S_0085F0_DB_DEST_BASE_ENA(1),
-					0, db);
-	}
-
-	ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
-}
-
 void evergreen_flush_vgt_streamout(struct r600_context *ctx)
 {
 	struct radeon_winsys_cs *cs = ctx->cs;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 3131f56..bd46072 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1176,6 +1176,8 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count,
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
 
+	if (count)
+		r600_inval_texture_cache(rctx);
 
 	memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count);
 	rctx->ps_samplers.n_samplers = count;
@@ -1190,6 +1192,9 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count,
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
 
+	if (count)
+		r600_inval_texture_cache(rctx);
+
 	for (int i = 0; i < count; i++) {
 		evergreen_context_pipe_state_set_vs_sampler(rctx, rstates[i], i);
 	}
@@ -1525,7 +1530,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 	if (rstate == NULL)
 		return;
 
-	evergreen_context_flush_dest_caches(rctx);
+	r600_flush_framebuffer(rctx, false);
 	rctx->num_dest_buffers = state->nr_cbufs;
 
 	/* unreference old buffer and reference new one */
@@ -1618,19 +1623,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 	}
 }
 
-static void evergreen_texture_barrier(struct pipe_context *ctx)
-{
-	struct r600_context *rctx = (struct r600_context *)ctx;
-
-	r600_context_flush_all(rctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) |
-			S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) |
-			S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) |
-			S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) |
-			S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1) |
-			S_0085F0_CB8_DEST_BASE_ENA(1) | S_0085F0_CB9_DEST_BASE_ENA(1) |
-			S_0085F0_CB10_DEST_BASE_ENA(1) | S_0085F0_CB11_DEST_BASE_ENA(1));
-}
-
 void evergreen_init_state_functions(struct r600_context *rctx)
 {
 	rctx->context.create_blend_state = evergreen_create_blend_state;
@@ -1671,7 +1663,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	rctx->context.set_viewport_state = evergreen_set_viewport_state;
 	rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
 	rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
-	rctx->context.texture_barrier = evergreen_texture_barrier;
+	rctx->context.texture_barrier = r600_texture_barrier;
 	rctx->context.create_stream_output_target = r600_create_so_target;
 	rctx->context.stream_output_target_destroy = r600_so_target_destroy;
 	rctx->context.set_stream_output_targets = r600_set_so_targets;
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 06eb9d1..b5d2a89 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -237,8 +237,10 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 			    int flag_wait);
 void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence,
                              unsigned offset, unsigned value);
-void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags);
-void r600_context_flush_dest_caches(struct r600_context *ctx);
+void r600_inval_shader_cache(struct r600_context *ctx);
+void r600_inval_texture_cache(struct r600_context *ctx);
+void r600_inval_vertex_cache(struct r600_context *ctx);
+void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now);
 
 void r600_context_streamout_begin(struct r600_context *ctx);
 void r600_context_streamout_end(struct r600_context *ctx);
@@ -249,7 +251,6 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60
 
 int evergreen_context_init(struct r600_context *ctx);
 void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
-void evergreen_context_flush_dest_caches(struct r600_context *ctx);
 void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
 void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
 void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 3399466..1cda187 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -416,10 +416,10 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0},
 	{R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
 	{R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0},
-	{R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
-	{R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0, 0},
+	{R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0, 0},
+	{R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0, 0},
+	{R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0, 0},
 	{R_02823C_CB_SHADER_MASK, 0, 0, 0},
 	{R_028238_CB_TARGET_MASK, 0, 0, 0},
 	{R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
@@ -587,11 +587,11 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{R_028638_SPI_VS_OUT_ID_9, 0, 0, 0},
 	{R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{R_028858_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_028858_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{R_028868_SQ_PGM_RESOURCES_VS, 0, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{R_028894_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_028894_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{R_0288A4_SQ_PGM_RESOURCES_FS, 0, 0, 0},
 	{R_0288D0_SQ_PGM_CF_OFFSET_VS, 0, 0, 0},
@@ -632,7 +632,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0},
 	{R_0286D8_SPI_INPUT_Z, 0, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+	{R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{R_028850_SQ_PGM_RESOURCES_PS, 0, 0, 0},
 	{R_028854_SQ_PGM_EXPORTS_PS, 0, 0, 0},
@@ -675,8 +675,8 @@ int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsig
 static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride)
 {
 	struct r600_reg r600_shader_resource[] = {
-		{R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF},
-		{R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF},
+		{R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, 0, 0},
+		{R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, 0, 0},
 		{R_038008_RESOURCE0_WORD2, 0, 0, 0},
 		{R_03800C_RESOURCE0_WORD3, 0, 0, 0},
 		{R_038010_RESOURCE0_WORD4, 0, 0, 0},
@@ -971,20 +971,6 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 	}
 }
 
-/* Flushes all surfaces */
-void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags)
-{
-	struct radeon_winsys_cs *cs = ctx->cs;
-
-	r600_need_cs_space(ctx, 5, FALSE);
-
-	cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
-	cs->buf[cs->cdw++] = flush_flags;     /* CP_COHER_CNTL */
-	cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
-	cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
-	cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
-}
-
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 				unsigned flush_mask, struct r600_resource *bo)
 {
@@ -997,38 +983,14 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 		return;
 	}
 
-	if ((ctx->screen->family < CHIP_RV770) &&
-	    (G_0085F0_CB_ACTION_ENA(flush_flags) ||
-	     G_0085F0_DB_ACTION_ENA(flush_flags))) {
-		if (ctx->flags & R600_CONTEXT_CHECK_EVENT_FLUSH) {
-			/* the rv670 seems to fail fbo-generatemipmap unless we flush the CB1 dest base ena */
-			if ((bo->cs_buf->binding & BO_BOUND_TEXTURE) &&
-			    (flush_flags & S_0085F0_CB_ACTION_ENA(1))) {
-				if ((ctx->screen->family == CHIP_RV670) ||
-				    (ctx->screen->family == CHIP_RS780) ||
-				    (ctx->screen->family == CHIP_RS880)) {
-					cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
-					cs->buf[cs->cdw++] = S_0085F0_CB1_DEST_BASE_ENA(1);     /* CP_COHER_CNTL */
-					cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
-					cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
-					cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
-				}
-			}
-
-			cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-			cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
-			ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
-		}
-	} else {
-		va = r600_resource_va(&ctx->screen->screen, (void *)bo);
-		cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
-		cs->buf[cs->cdw++] = flush_flags;
-		cs->buf[cs->cdw++] = (bo->buf->size + 255) >> 8;
-		cs->buf[cs->cdw++] = va >> 8;
-		cs->buf[cs->cdw++] = 0x0000000A;
-		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-		cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
-	}
+	va = r600_resource_va(&ctx->screen->screen, (void *)bo);
+	cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
+	cs->buf[cs->cdw++] = flush_flags;
+	cs->buf[cs->cdw++] = (bo->buf->size + 255) >> 8;
+	cs->buf[cs->cdw++] = va >> 8;
+	cs->buf[cs->cdw++] = 0x0000000A;
+	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
 	bo->cs_buf->last_flush = (bo->cs_buf->last_flush | flush_flags) & flush_mask;
 }
 
@@ -1387,43 +1349,6 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60
 	LIST_DELINIT(&block->list);
 }
 
-void r600_context_flush_dest_caches(struct r600_context *ctx)
-{
-	struct r600_resource *cb[8];
-	struct r600_resource *db;
-	int i;
-
-	if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
-		return;
-
-	db = r600_context_reg_bo(ctx, R_02800C_DB_DEPTH_BASE);
-	cb[0] = r600_context_reg_bo(ctx, R_028040_CB_COLOR0_BASE);
-	cb[1] = r600_context_reg_bo(ctx, R_028044_CB_COLOR1_BASE);
-	cb[2] = r600_context_reg_bo(ctx, R_028048_CB_COLOR2_BASE);
-	cb[3] = r600_context_reg_bo(ctx, R_02804C_CB_COLOR3_BASE);
-	cb[4] = r600_context_reg_bo(ctx, R_028050_CB_COLOR4_BASE);
-	cb[5] = r600_context_reg_bo(ctx, R_028054_CB_COLOR5_BASE);
-	cb[6] = r600_context_reg_bo(ctx, R_028058_CB_COLOR6_BASE);
-	cb[7] = r600_context_reg_bo(ctx, R_02805C_CB_COLOR7_BASE);
-
-	ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH;
-	/* flush the color buffers */
-	for (i = 0; i < 8; i++) {
-		if (!cb[i])
-			continue;
-
-		r600_context_bo_flush(ctx,
-					(S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
-					S_0085F0_CB_ACTION_ENA(1),
-					0, cb[i]);
-	}
-	if (db) {
-		r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1), 0, db);
-	}
-	ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
-	ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
-}
-
 void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 {
 	struct radeon_winsys_cs *cs = ctx->cs;
@@ -1483,6 +1408,65 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 	cs->cdw += ndwords;
 }
 
+void r600_inval_shader_cache(struct r600_context *ctx)
+{
+	ctx->atom_surface_sync.flush_flags |= S_0085F0_SH_ACTION_ENA(1);
+	r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
+}
+
+void r600_inval_texture_cache(struct r600_context *ctx)
+{
+	ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
+	r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
+}
+
+void r600_inval_vertex_cache(struct r600_context *ctx)
+{
+	if (ctx->family == CHIP_RV610 ||
+	    ctx->family == CHIP_RV620 ||
+	    ctx->family == CHIP_RS780 ||
+	    ctx->family == CHIP_RS880 ||
+	    ctx->family == CHIP_RV710 ||
+	    ctx->family == CHIP_CEDAR ||
+	    ctx->family == CHIP_PALM ||
+	    ctx->family == CHIP_SUMO ||
+	    ctx->family == CHIP_SUMO2 ||
+	    ctx->family == CHIP_CAICOS) {
+		/* The low-end GPUs don't have the vertex cache and use the texture cache instead. */
+		ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
+	} else {
+		ctx->atom_surface_sync.flush_flags |= S_0085F0_VC_ACTION_ENA(1);
+	}
+	r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
+}
+
+void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now)
+{
+	if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
+		return;
+
+	ctx->atom_surface_sync.flush_flags |=
+		r600_get_cb_flush_flags(ctx) |
+		(ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0);
+
+	if (flush_now) {
+		r600_emit_atom(ctx, &ctx->atom_surface_sync.atom);
+	} else {
+		r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
+	}
+
+	/* Also add a complete cache flush to work around broken flushing on R6xx. */
+	if (ctx->chip_class == R600) {
+		if (flush_now) {
+			r600_emit_atom(ctx, &ctx->atom_r6xx_flush_and_inv);
+		} else {
+			r600_atom_dirty(ctx, &ctx->atom_r6xx_flush_and_inv);
+		}
+	}
+
+	ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
+}
+
 void r600_context_flush(struct r600_context *ctx, unsigned flags)
 {
 	struct radeon_winsys_cs *cs = ctx->cs;
@@ -1504,10 +1488,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 		streamout_suspended = true;
 	}
 
-	if (ctx->screen->chip_class >= EVERGREEN)
-		evergreen_context_flush_dest_caches(ctx);
-	else
-		r600_context_flush_dest_caches(ctx);
+	r600_flush_framebuffer(ctx, true);
 
 	/* partial flush is needed to avoid lockups on some chips with user fences */
 	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index b927625..351ecfe 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -232,6 +232,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 	rctx->context.create_video_decoder = vl_create_decoder;
 	rctx->context.create_video_buffer = vl_video_buffer_create;
 
+	r600_init_common_atoms(rctx);
+
 	switch (rctx->chip_class) {
 	case R600:
 	case R700:
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 78b6d83..c327954 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -67,6 +67,11 @@ struct r600_atom {
 	struct list_head	head;
 };
 
+struct r600_atom_surface_sync {
+	struct r600_atom atom;
+	unsigned flush_flags; /* CP_COHER_CNTL */
+};
+
 enum r600_pipe_state_id {
 	R600_PIPE_STATE_BLEND = 0,
 	R600_PIPE_STATE_BLEND_COLOR,
@@ -272,6 +277,8 @@ struct r600_context {
 
 	/* States based on r600_state. */
 	struct list_head		dirty_states;
+	struct r600_atom_surface_sync	atom_surface_sync;
+	struct r600_atom		atom_r6xx_flush_and_inv;
 
 	/* Below are variables from the old r600_context.
 	 */
@@ -426,6 +433,9 @@ void r600_translate_index_buffer(struct r600_context *r600,
 				 unsigned count);
 
 /* r600_state_common.c */
+void r600_init_common_atoms(struct r600_context *rctx);
+unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
+void r600_texture_barrier(struct pipe_context *ctx);
 void r600_set_index_buffer(struct pipe_context *ctx,
 			   const struct pipe_index_buffer *ib);
 void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index f2e0bfc..7b3d813 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1176,6 +1176,9 @@ static void r600_set_sampler_views(struct r600_context *rctx,
 	struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views;
 	unsigned i;
 
+	if (count)
+		r600_inval_texture_cache(rctx);
+
 	for (i = 0; i < count; i++) {
 		if (rviews[i]) {
 			if (((struct r600_resource_texture *)rviews[i]->base.texture)->depth)
@@ -1610,7 +1613,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 	if (rstate == NULL)
 		return;
 
-	r600_context_flush_dest_caches(rctx);
+	r600_flush_framebuffer(rctx, false);
 	rctx->num_dest_buffers = state->nr_cbufs;
 
 	/* unreference old buffer and reference new one */
@@ -1700,17 +1703,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 	}
 }
 
-static void r600_texture_barrier(struct pipe_context *ctx)
-{
-	struct r600_context *rctx = (struct r600_context *)ctx;
-
-	r600_context_flush_all(rctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) |
-			S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) |
-			S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) |
-			S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) |
-			S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1));
-}
-
 void r600_init_state_functions(struct r600_context *rctx)
 {
 	rctx->context.create_blend_state = r600_create_blend_state;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 0c06ad0..7dae397 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -34,6 +34,70 @@
 #include "r600_pipe.h"
 #include "r600d.h"
 
+static void r600_emit_surface_sync(struct r600_context *rctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = rctx->cs;
+	struct r600_atom_surface_sync *a = (struct r600_atom_surface_sync*)atom;
+
+	cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
+	cs->buf[cs->cdw++] = a->flush_flags;  /* CP_COHER_CNTL */
+	cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
+	cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
+	cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
+
+	a->flush_flags = 0;
+}
+
+static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = rctx->cs;
+	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+	cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
+}
+
+static void r600_init_atom(struct r600_atom *atom,
+			   void (*emit)(struct r600_context *ctx, struct r600_atom *state),
+			   unsigned num_dw,
+			   enum r600_atom_flags flags)
+{
+	atom->emit = emit;
+	atom->num_dw = num_dw;
+	atom->flags = flags;
+}
+
+void r600_init_common_atoms(struct r600_context *rctx)
+{
+	r600_init_atom(&rctx->atom_surface_sync.atom,	r600_emit_surface_sync,		5, EMIT_EARLY);
+	r600_init_atom(&rctx->atom_r6xx_flush_and_inv,	r600_emit_r6xx_flush_and_inv,	2, EMIT_EARLY);
+}
+
+unsigned r600_get_cb_flush_flags(struct r600_context *rctx)
+{
+	unsigned flags = 0;
+
+	if (rctx->framebuffer.nr_cbufs) {
+		flags |= S_0085F0_CB_ACTION_ENA(1) |
+			 (((1 << rctx->framebuffer.nr_cbufs) - 1) << S_0085F0_CB0_DEST_BASE_ENA_SHIFT);
+	}
+
+	/* Workaround for broken flushing on some R6xx chipsets. */
+	if (rctx->screen->family == CHIP_RV670 ||
+	    rctx->screen->family == CHIP_RS780 ||
+	    rctx->screen->family == CHIP_RS880) {
+		flags |=  S_0085F0_CB1_DEST_BASE_ENA(1) |
+			  S_0085F0_DEST_BASE_0_ENA(1);
+	}
+	return flags;
+}
+
+void r600_texture_barrier(struct pipe_context *ctx)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+
+	rctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1) | r600_get_cb_flush_flags(rctx);
+	r600_atom_dirty(rctx, &rctx->atom_surface_sync.atom);
+}
+
 static bool r600_conv_pipe_prim(unsigned pprim, unsigned *prim)
 {
 	static const int prim_conv[] = {
@@ -226,6 +290,7 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
 
 	rctx->vertex_elements = v;
 	if (v) {
+		r600_inval_shader_cache(rctx);
 		u_vbuf_bind_vertex_elements(rctx->vbuf_mgr, state,
 						v->vmgr_elements);
 
@@ -333,6 +398,7 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
 	/* TODO delete old shader */
 	rctx->ps_shader = (struct r600_pipe_shader *)state;
 	if (state) {
+		r600_inval_shader_cache(rctx);
 		r600_context_pipe_state_set(rctx, &rctx->ps_shader->rstate);
 
 		rctx->cb_color_control &= C_028808_MULTIWRITE_ENABLE;
@@ -350,6 +416,7 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
 	/* TODO delete old shader */
 	rctx->vs_shader = (struct r600_pipe_shader *)state;
 	if (state) {
+		r600_inval_shader_cache(rctx);
 		r600_context_pipe_state_set(rctx, &rctx->vs_shader->rstate);
 	}
 	if (rctx->ps_shader && rctx->vs_shader) {
@@ -416,6 +483,8 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 		return;
 	}
 
+	r600_inval_shader_cache(rctx);
+
 	r600_upload_const_buffer(rctx, &rbuffer, &offset);
 	va_offset = r600_resource_va(ctx->screen, (void*)rbuffer);
 	va_offset += offset;
@@ -558,6 +627,8 @@ static void r600_vertex_buffer_update(struct r600_context *rctx)
 	struct pipe_vertex_buffer *vertex_buffer;
 	unsigned i, count, offset;
 
+	r600_inval_vertex_cache(rctx);
+
 	if (rctx->vertex_elements->vbuffer_need_offset) {
 		/* one resource per vertex elements */
 		count = rctx->vertex_elements->count;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index e2a526f..3c3238a 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -3270,6 +3270,7 @@
 #define   S_0085F0_SO3_DEST_BASE_ENA(x)                (((x) & 0x1) << 5)
 #define   G_0085F0_SO3_DEST_BASE_ENA(x)                (((x) >> 5) & 0x1)
 #define   C_0085F0_SO3_DEST_BASE_ENA                   0xFFFFFFDF
+#define   S_0085F0_CB0_DEST_BASE_ENA_SHIFT             6
 #define   S_0085F0_CB0_DEST_BASE_ENA(x)                (((x) & 0x1) << 6)
 #define   G_0085F0_CB0_DEST_BASE_ENA(x)                (((x) >> 6) & 0x1)
 #define   C_0085F0_CB0_DEST_BASE_ENA                   0xFFFFFFBF
-- 
1.7.5.4



More information about the mesa-dev mailing list