Mesa (master): r600g: don't flush the dest caches on every draw

Dave Airlie airlied at kemper.freedesktop.org
Tue Apr 19 23:16:27 UTC 2011


Module: Mesa
Branch: master
Commit: 6067a2a67f9a7aab2aee051469bea8af03747a95
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6067a2a67f9a7aab2aee051469bea8af03747a95

Author: Fredrik Höglund <fredrik at kde.org>
Date:   Wed Apr 20 00:21:42 2011 +0200

r600g: don't flush the dest caches on every draw

Keep track of when the caches are dirty, and only flush them when
the framebuffer state is set and when the context is flushed.

Signed-off-by: Dave Airlie <airlied at redhat.com>

---

 src/gallium/drivers/r600/evergreen_state.c         |   19 +++-
 src/gallium/drivers/r600/evergreend.h              |    1 +
 src/gallium/drivers/r600/r600.h                    |    6 +
 src/gallium/drivers/r600/r600_pipe.h               |    2 -
 src/gallium/drivers/r600/r600_state.c              |   12 ++-
 src/gallium/winsys/r600/drm/evergreen_hw_context.c |   86 +++++++++-----
 src/gallium/winsys/r600/drm/r600_hw_context.c      |  122 ++++++++++++++------
 7 files changed, 176 insertions(+), 72 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index febc613..a972f82 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -822,6 +822,9 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 	if (rstate == NULL)
 		return;
 
+	evergreen_context_flush_dest_caches(&rctx->ctx);
+	rctx->ctx.num_dest_buffers = state->nr_cbufs;
+
 	/* unreference old buffer and reference new one */
 	rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
 
@@ -833,6 +836,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 	}
 	if (state->zsbuf) {
 		evergreen_db(rctx, rstate, state);
+		rctx->ctx.num_dest_buffers++;
 	}
 
 	target_mask = 0x00000000;
@@ -894,6 +898,19 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 	}
 }
 
+static void evergreen_texture_barrier(struct pipe_context *ctx)
+{
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+
+	r600_context_flush_all(&rctx->ctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) |
+			S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) |
+			S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) |
+			S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) |
+			S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1) |
+			S_0085F0_CB8_DEST_BASE_ENA(1) | S_0085F0_CB9_DEST_BASE_ENA(1) |
+			S_0085F0_CB10_DEST_BASE_ENA(1) | S_0085F0_CB11_DEST_BASE_ENA(1));
+}
+
 void evergreen_init_state_functions(struct r600_pipe_context *rctx)
 {
 	rctx->context.create_blend_state = evergreen_create_blend_state;
@@ -934,7 +951,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
 	rctx->context.set_viewport_state = evergreen_set_viewport_state;
 	rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
 	rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
-	rctx->context.texture_barrier = r600_texture_barrier;
+	rctx->context.texture_barrier = evergreen_texture_barrier;
 }
 
 void evergreen_init_config(struct r600_pipe_context *rctx)
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 8489c29..de445b8 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -43,6 +43,7 @@
 #define EVERGREEN_CTL_CONST_OFFSET                  0x0003CFF0
 #define EVERGREEN_CTL_CONST_END                     0x0003E200
 
+#define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
 #define EVENT_TYPE_ZPASS_DONE                  0x15
 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
 #define		EVENT_TYPE(x)                           ((x) << 0)
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 41666f2..0b0df9d 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -233,6 +233,8 @@ struct r600_query {
 #define R600_QUERY_STATE_ENDED		(1 << 1)
 #define R600_QUERY_STATE_SUSPENDED	(1 << 2)
 
+#define R600_CONTEXT_DRAW_PENDING	(1 << 0)
+#define R600_CONTEXT_DST_CACHES_DIRTY	(1 << 1)
 
 struct r600_context {
 	struct radeon		*radeon;
@@ -255,6 +257,8 @@ struct r600_context {
 	unsigned		num_query_running;
 	struct list_head	fenced_bo;
 	unsigned                max_db; /* for OQ */
+	unsigned                num_dest_buffers;
+	unsigned		flags;
 	boolean                 predicate_drawing;
 };
 
@@ -293,9 +297,11 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence,
                              unsigned offset, unsigned value);
 void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags);
+void r600_context_flush_dest_caches(struct r600_context *ctx);
 
 int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon);
 void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
+void evergreen_context_flush_dest_caches(struct r600_context *ctx);
 void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
 void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
 void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 5b26d1f..88aff0e 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -298,8 +298,6 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 			      struct pipe_resource *buffer);
 void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
 
-void r600_texture_barrier(struct pipe_context *ctx);
-
 /*
  * common helpers
  */
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index e9814da..ac2e898 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -857,6 +857,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 	if (rstate == NULL)
 		return;
 
+	r600_context_flush_dest_caches(&rctx->ctx);
+	rctx->ctx.num_dest_buffers = state->nr_cbufs;
+
 	/* unreference old buffer and reference new one */
 	rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
 
@@ -868,6 +871,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 	}
 	if (state->zsbuf) {
 		r600_db(rctx, rstate, state);
+		rctx->ctx.num_dest_buffers++;
 	}
 
 	target_mask = 0x00000000;
@@ -947,11 +951,15 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 	}
 }
 
-void r600_texture_barrier(struct pipe_context *ctx)
+static void r600_texture_barrier(struct pipe_context *ctx)
 {
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 
-	r600_context_flush_all(&rctx->ctx, S_0085F0_TC_ACTION_ENA(1));
+	r600_context_flush_all(&rctx->ctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) |
+			S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) |
+			S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) |
+			S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) |
+			S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1));
 }
 
 void r600_init_state_functions(struct r600_pipe_context *rctx)
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index b16ba07..0a5b1a0 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -680,6 +680,17 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context
 	r600_context_dirty_block(ctx, block, dirty, 2);
 }
 
+static inline void evergreen_context_ps_partial_flush(struct r600_context *ctx)
+{
+	if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
+		return;
+
+	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+
+	ctx->flags &= ~R600_CONTEXT_DRAW_PENDING;
+}
+
 static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id)
 {
 	unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) * 0x100 + 0x40000 + id * 0x1C;
@@ -698,6 +709,7 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c
 	if (state->nregs <= 3) {
 		return;
 	}
+
 	dirty = block->status & R600_BLOCK_STATUS_DIRTY;
 	if (block->reg[0] != id) {
 		block->reg[0] = id;
@@ -711,6 +723,12 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c
 		}
 	}
 
+	/* We have to flush the shaders before we change the border color
+	 * registers, or previous draw commands that haven't completed yet
+	 * will end up using the new border color. */
+	if (dirty & R600_BLOCK_STATUS_DIRTY)
+		evergreen_context_ps_partial_flush(ctx);
+
 	r600_context_dirty_block(ctx, block, dirty, 4);
 }
 
@@ -735,42 +753,18 @@ void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struc
 
 void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 {
-	struct r600_bo *cb[12];
-	struct r600_bo *db;
-	unsigned ndwords = 9, flush;
+	unsigned ndwords = 7;
 	struct r600_block *dirty_block = NULL;
 	struct r600_block *next_block;
 
 	if (draw->indices) {
-		ndwords = 13;
+		ndwords = 11;
 		/* make sure there is enough relocation space before scheduling draw */
 		if (ctx->creloc >= (ctx->nreloc - 1)) {
 			r600_context_flush(ctx);
 		}
 	}
 
-	/* find number of color buffer */
-	db = r600_context_reg_bo(ctx, R_028048_DB_Z_READ_BASE);
-	cb[0] = r600_context_reg_bo(ctx, R_028C60_CB_COLOR0_BASE);
-	cb[1] = r600_context_reg_bo(ctx, R_028C9C_CB_COLOR1_BASE);
-	cb[2] = r600_context_reg_bo(ctx, R_028CD8_CB_COLOR2_BASE);
-	cb[3] = r600_context_reg_bo(ctx, R_028D14_CB_COLOR3_BASE);
-	cb[4] = r600_context_reg_bo(ctx, R_028D50_CB_COLOR4_BASE);
-	cb[5] = r600_context_reg_bo(ctx, R_028D8C_CB_COLOR5_BASE);
-	cb[6] = r600_context_reg_bo(ctx, R_028DC8_CB_COLOR6_BASE);
-	cb[7] = r600_context_reg_bo(ctx, R_028E04_CB_COLOR7_BASE);
-	cb[8] = r600_context_reg_bo(ctx, R_028E40_CB_COLOR8_BASE);
-	cb[9] = r600_context_reg_bo(ctx, R_028E5C_CB_COLOR9_BASE);
-	cb[10] = r600_context_reg_bo(ctx, R_028E78_CB_COLOR10_BASE);
-	cb[11] = r600_context_reg_bo(ctx, R_028E94_CB_COLOR11_BASE);
-	for (int i = 0; i < 12; i++) {
-		if (cb[i]) {
-			ndwords += 7;
-		}
-	}
-	if (db)
-		ndwords += 7;
-
 	/* queries need some special values */
 	if (ctx->num_query_running) {
 		r600_context_reg(ctx,
@@ -783,6 +777,10 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 				S_02800C_NOOP_CULL_DISABLE(1));
 	}
 
+	/* update the max dword count to make sure we have enough space
+	 * reserved for flushing the destination caches */
+	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16;
+
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
 		r600_context_flush(ctx);
@@ -817,12 +815,41 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 		ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
 		ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
 	}
-	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing);
-	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
+
+	ctx->flags |= (R600_CONTEXT_DRAW_PENDING | R600_CONTEXT_DST_CACHES_DIRTY);
+
+	/* all dirty state have been scheduled in current cs */
+	ctx->pm4_dirty_cdwords = 0;
+}
+
+void evergreen_context_flush_dest_caches(struct r600_context *ctx)
+{
+	struct r600_bo *cb[12];
+	struct r600_bo *db;
+
+	if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
+		return;
+
+	/* find number of color buffer */
+	db = r600_context_reg_bo(ctx, R_028048_DB_Z_READ_BASE);
+	cb[0] = r600_context_reg_bo(ctx, R_028C60_CB_COLOR0_BASE);
+	cb[1] = r600_context_reg_bo(ctx, R_028C9C_CB_COLOR1_BASE);
+	cb[2] = r600_context_reg_bo(ctx, R_028CD8_CB_COLOR2_BASE);
+	cb[3] = r600_context_reg_bo(ctx, R_028D14_CB_COLOR3_BASE);
+	cb[4] = r600_context_reg_bo(ctx, R_028D50_CB_COLOR4_BASE);
+	cb[5] = r600_context_reg_bo(ctx, R_028D8C_CB_COLOR5_BASE);
+	cb[6] = r600_context_reg_bo(ctx, R_028DC8_CB_COLOR6_BASE);
+	cb[7] = r600_context_reg_bo(ctx, R_028E04_CB_COLOR7_BASE);
+	cb[8] = r600_context_reg_bo(ctx, R_028E40_CB_COLOR8_BASE);
+	cb[9] = r600_context_reg_bo(ctx, R_028E5C_CB_COLOR9_BASE);
+	cb[10] = r600_context_reg_bo(ctx, R_028E78_CB_COLOR10_BASE);
+	cb[11] = r600_context_reg_bo(ctx, R_028E94_CB_COLOR11_BASE);
 
 	/* flush color buffer */
 	for (int i = 0; i < 12; i++) {
 		if (cb[i]) {
+			unsigned flush;
+
 			if (i > 7) {
 				flush = (S_0085F0_CB8_DEST_BASE_ENA(1) << (i - 8)) |
 					S_0085F0_CB_ACTION_ENA(1);
@@ -840,7 +867,6 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 					0, db);
 	}
 
-	/* all dirty state have been scheduled in current cs */
-	ctx->pm4_dirty_cdwords = 0;
+	ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
 }
 
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index d0bef8a..8e547da 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -798,7 +798,6 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 				unsigned flush_mask, struct r600_bo *rbo)
 {
 	struct radeon_bo *bo;
-
 	bo = r600_bo_get_bo(rbo);
 	/* if bo has already been flushed */
 	if (!(~bo->last_flush & flush_flags)) {
@@ -1032,6 +1031,17 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx,
 	r600_context_dirty_block(ctx, block, dirty, 2);
 }
 
+static inline void r600_context_ps_partial_flush(struct r600_context *ctx)
+{
+	if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
+		return;
+
+	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+
+	ctx->flags &= ~R600_CONTEXT_DRAW_PENDING;
+}
+
 static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
 {
 	struct r600_range *range;
@@ -1057,6 +1067,12 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex
 		}
 	}
 
+	/* We have to flush the shaders before we change the border color
+	 * registers, or previous draw commands that haven't completed yet
+	 * will end up using the new border color. */
+	if (dirty & R600_BLOCK_STATUS_DIRTY)
+		r600_context_ps_partial_flush(ctx);
+
 	r600_context_dirty_block(ctx, block, dirty, 3);
 }
 
@@ -1136,24 +1152,14 @@ out:
 	LIST_DELINIT(&block->list);
 }
 
-void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
+void r600_context_flush_dest_caches(struct r600_context *ctx)
 {
 	struct r600_bo *cb[8];
 	struct r600_bo *db;
-	unsigned ndwords = 9;
-	struct r600_block *dirty_block = NULL;
-	struct r600_block *next_block;
-	unsigned rv6xx_surface_base_update = 0;
 
-	if (draw->indices) {
-		ndwords = 13;
-		/* make sure there is enough relocation space before scheduling draw */
-		if (ctx->creloc >= (ctx->nreloc - 1)) {
-			r600_context_flush(ctx);
-		}
-	}
+	if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
+		return;
 
-	/* find number of color buffer */
 	db = r600_context_reg_bo(ctx, R_02800C_DB_DEPTH_BASE);
 	cb[0] = r600_context_reg_bo(ctx, R_028040_CB_COLOR0_BASE);
 	cb[1] = r600_context_reg_bo(ctx, R_028044_CB_COLOR1_BASE);
@@ -1163,16 +1169,64 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 	cb[5] = r600_context_reg_bo(ctx, R_028054_CB_COLOR5_BASE);
 	cb[6] = r600_context_reg_bo(ctx, R_028058_CB_COLOR6_BASE);
 	cb[7] = r600_context_reg_bo(ctx, R_02805C_CB_COLOR7_BASE);
+
+	/* flush the color buffers */
 	for (int i = 0; i < 8; i++) {
-		if (cb[i]) {
-			ndwords += 7;
-			rv6xx_surface_base_update |= SURFACE_BASE_UPDATE_COLOR(i);
-		}
+		if (!cb[i])
+			continue;
+
+		r600_context_bo_flush(ctx,
+					(S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
+					S_0085F0_CB_ACTION_ENA(1),
+					0, cb[i]);
 	}
 	if (db) {
-		ndwords += 7;
-		rv6xx_surface_base_update |= SURFACE_BASE_UPDATE_DEPTH;
+		r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1), 0, db);
+	}
+
+	ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
+}
+
+void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
+{
+	unsigned ndwords = 7;
+	struct r600_block *dirty_block = NULL;
+	struct r600_block *next_block;
+	unsigned rv6xx_surface_base_update = 0;
+
+	if (draw->indices) {
+		ndwords = 11;
+		/* make sure there is enough relocation space before scheduling draw */
+		if (ctx->creloc >= (ctx->nreloc - 1)) {
+			r600_context_flush(ctx);
+		}
+	}
+
+	/* rv6xx surface base update */
+	if ((ctx->radeon->family > CHIP_R600) &&
+	    (ctx->radeon->family < CHIP_RV770)) {
+		struct r600_bo *cb[8];
+		struct r600_bo *db;
+
+		db = r600_context_reg_bo(ctx, R_02800C_DB_DEPTH_BASE);
+		cb[0] = r600_context_reg_bo(ctx, R_028040_CB_COLOR0_BASE);
+		cb[1] = r600_context_reg_bo(ctx, R_028044_CB_COLOR1_BASE);
+		cb[2] = r600_context_reg_bo(ctx, R_028048_CB_COLOR2_BASE);
+		cb[3] = r600_context_reg_bo(ctx, R_02804C_CB_COLOR3_BASE);
+		cb[4] = r600_context_reg_bo(ctx, R_028050_CB_COLOR4_BASE);
+		cb[5] = r600_context_reg_bo(ctx, R_028054_CB_COLOR5_BASE);
+		cb[6] = r600_context_reg_bo(ctx, R_028058_CB_COLOR6_BASE);
+		cb[7] = r600_context_reg_bo(ctx, R_02805C_CB_COLOR7_BASE);
+		for (int i = 0; i < 8; i++) {
+			if (cb[i]) {
+				rv6xx_surface_base_update |= SURFACE_BASE_UPDATE_COLOR(i);
+			}
+		}
+		if (db) {
+			rv6xx_surface_base_update |= SURFACE_BASE_UPDATE_DEPTH;
+		}
 	}
+
 	/* XXX also need to update SURFACE_BASE_UPDATE_STRMOUT when we support it */
 
 	/* queries need some special values */
@@ -1189,6 +1243,10 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 				S_028D10_NOOP_CULL_DISABLE(1));
 	}
 
+	/* update the max dword count to make sure we have enough space
+	 * reserved for flushing the destination caches */
+	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16;
+
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
 		r600_context_flush(ctx);
@@ -1198,7 +1256,6 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 		R600_ERR("context is too big to be scheduled\n");
 		return;
 	}
-
 	/* enough room to copy packet */
 	LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->dirty, list) {
 		r600_context_block_emit_dirty(ctx, dirty_block);
@@ -1227,21 +1284,8 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 		ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
 		ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
 	}
-	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing);
-	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
 
-	/* flush color buffer */
-	for (int i = 0; i < 8; i++) {
-		if (cb[i]) {
-			r600_context_bo_flush(ctx,
-					(S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
-					S_0085F0_CB_ACTION_ENA(1),
-					0, cb[i]);
-		}
-	}
-	if (db) {
-		r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1), 0, db);
-	}
+	ctx->flags |= (R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING);
 
 	/* all dirty state have been scheduled in current cs */
 	ctx->pm4_dirty_cdwords = 0;
@@ -1261,9 +1305,12 @@ void r600_context_flush(struct r600_context *ctx)
 	/* suspend queries */
 	r600_context_queries_suspend(ctx);
 
+	if (ctx->radeon->family >= CHIP_CEDAR)
+		evergreen_context_flush_dest_caches(ctx);
+	else
+		r600_context_flush_dest_caches(ctx);
+
 	/* emit fence */
-	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
 	ctx->pm4[ctx->pm4_cdwords++] = 0;
@@ -1311,6 +1358,7 @@ void r600_context_flush(struct r600_context *ctx)
 	ctx->creloc = 0;
 	ctx->pm4_dirty_cdwords = 0;
 	ctx->pm4_cdwords = 0;
+	ctx->flags = 0;
 
 	/* resume queries */
 	r600_context_queries_resume(ctx);




More information about the mesa-commit mailing list