[Mesa-dev] [PATCH 06/13] radeonsi: only flush the right set of caches for CP DMA operations

Marek Olšák maraeo at gmail.com
Mon Jan 5 12:20:56 PST 2015


From: Marek Olšák <marek.olsak at amd.com>

That's either framebuffer caches or caches for shader resources.
The motivation is that framebuffer caches need to be flushed very rarely
here.
---
 src/gallium/drivers/r600/r600_blit.c          |  3 +-
 src/gallium/drivers/radeon/r600_pipe_common.c |  5 +--
 src/gallium/drivers/radeon/r600_pipe_common.h |  6 ++--
 src/gallium/drivers/radeon/r600_texture.c     |  8 +++--
 src/gallium/drivers/radeon/radeon_video.c     |  3 +-
 src/gallium/drivers/radeonsi/si_blit.c        |  2 +-
 src/gallium/drivers/radeonsi/si_descriptors.c | 51 +++++++++++++++------------
 src/gallium/drivers/radeonsi/si_pipe.c        |  2 +-
 src/gallium/drivers/radeonsi/si_state.h       |  2 +-
 9 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index bdc5f9f..01262a5 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -565,7 +565,8 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
 }
 
 static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
-			      unsigned offset, unsigned size, unsigned value)
+			      unsigned offset, unsigned size, unsigned value,
+			      bool is_framebuffer)
 {
 	struct r600_context *rctx = (struct r600_context*)ctx;
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index d4133d8..8449a1f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -905,12 +905,13 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 }
 
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
-			      unsigned offset, unsigned size, unsigned value)
+			      unsigned offset, unsigned size, unsigned value,
+			      bool is_framebuffer)
 {
 	struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
 
 	pipe_mutex_lock(rscreen->aux_context_lock);
-	rctx->clear_buffer(&rctx->b, dst, offset, size, value);
+	rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
 	rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
 	pipe_mutex_unlock(rscreen->aux_context_lock);
 }
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 15736d7..a9416b6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -388,7 +388,8 @@ struct r600_common_context {
 			 const struct pipe_box *src_box);
 
 	void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
-			     unsigned offset, unsigned size, unsigned value);
+			     unsigned offset, unsigned size, unsigned value,
+			     bool is_framebuffer);
 
 	void (*blit_decompress_depth)(struct pipe_context *ctx,
 				      struct r600_texture *texture,
@@ -441,7 +442,8 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour
 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 			  const struct tgsi_token *tokens);
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
-			      unsigned offset, unsigned size, unsigned value);
+			      unsigned offset, unsigned size, unsigned value,
+			      bool is_framebuffer);
 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
 						  const struct pipe_resource *templ);
 const char *r600_get_llvm_processor_name(enum radeon_family family);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index fdf4d76..ab8ce7b 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -559,7 +559,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
 		 * without htile buffer */
 		R600_ERR("Failed to create buffer object for htile buffer.\n");
 	} else {
-		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, htile_size, 0);
+		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
+					 htile_size, 0, true);
 	}
 }
 
@@ -638,7 +639,8 @@ r600_texture_create_object(struct pipe_screen *screen,
 	if (rtex->cmask.size) {
 		/* Initialize the cmask to 0xCC (= compressed state). */
 		r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
-					 rtex->cmask.offset, rtex->cmask.size, 0xCCCCCCCC);
+					 rtex->cmask.offset, rtex->cmask.size,
+					 0xCCCCCCCC, true);
 	}
 
 	/* Initialize the CMASK base register value. */
@@ -1273,7 +1275,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 		/* Do the fast clear. */
 		evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
 		rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
-				   tex->cmask.offset, tex->cmask.size, 0);
+				   tex->cmask.offset, tex->cmask.size, 0, true);
 
 		tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
 		fb_state->dirty = true;
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index f6cfdff..1420798 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -122,7 +122,8 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
 {
 	struct r600_common_context *rctx = (struct r600_common_context*)context;
 
-	rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, 0);
+	rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
+			   0, false);
 	context->flush(context, NULL, 0);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 4744154..1f2c408 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -556,7 +556,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
 
 	/* Fallback for buffers. */
 	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
+		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
 		return;
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index d46f4e5..c959961 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1054,9 +1054,11 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
 
 static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
-			    unsigned offset, unsigned size, unsigned value)
+			    unsigned offset, unsigned size, unsigned value,
+			    bool is_framebuffer)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
+	unsigned flush_flags;
 
 	if (!size)
 		return;
@@ -1081,12 +1083,15 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 	uint64_t va = r600_resource(dst)->gpu_address + offset;
 
 	/* Flush the caches where the resource is bound. */
-	/* XXX only flush the caches where the buffer is bound. */
-	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-			 SI_CONTEXT_INV_TC_L2 |
-			 SI_CONTEXT_INV_KCACHE |
-			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+	if (is_framebuffer)
+		flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+	else
+		flush_flags = SI_CONTEXT_INV_TC_L1 |
+			      SI_CONTEXT_INV_TC_L2 |
+			      SI_CONTEXT_INV_KCACHE;
+
+	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+			 flush_flags;
 
 	while (size) {
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
@@ -1120,17 +1125,16 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 
 	/* Flush the caches again in case the 3D engine has been prefetching
 	 * the resource. */
-	/* XXX only flush the caches where the buffer is bound. */
-	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-			 SI_CONTEXT_INV_TC_L2 |
-			 SI_CONTEXT_INV_KCACHE |
-			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+	sctx->b.flags |= flush_flags;
 }
 
 void si_copy_buffer(struct si_context *sctx,
 		    struct pipe_resource *dst, struct pipe_resource *src,
-		    uint64_t dst_offset, uint64_t src_offset, unsigned size)
+		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
+		    bool is_framebuffer)
 {
+	unsigned flush_flags;
+
 	if (!size)
 		return;
 
@@ -1144,11 +1148,15 @@ void si_copy_buffer(struct si_context *sctx,
 	src_offset += r600_resource(src)->gpu_address;
 
 	/* Flush the caches where the resource is bound. */
-	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-			 SI_CONTEXT_INV_TC_L2 |
-			 SI_CONTEXT_INV_KCACHE |
-			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
-			 SI_CONTEXT_PS_PARTIAL_FLUSH;
+	if (is_framebuffer)
+		flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+	else
+		flush_flags = SI_CONTEXT_INV_TC_L1 |
+			      SI_CONTEXT_INV_TC_L2 |
+			      SI_CONTEXT_INV_KCACHE;
+
+	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+			 flush_flags;
 
 	while (size) {
 		unsigned sync_flags = 0;
@@ -1180,10 +1188,9 @@ void si_copy_buffer(struct si_context *sctx,
 		dst_offset += byte_count;
 	}
 
-	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-			 SI_CONTEXT_INV_TC_L2 |
-			 SI_CONTEXT_INV_KCACHE |
-			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+	/* Flush the caches again in case the 3D engine has been prefetching
+	 * the resource. */
+	sctx->b.flags |= flush_flags;
 }
 
 /* INIT/DEINIT */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4b66499..8352c0e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -151,7 +151,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 
 		/* Clear the NULL constant buffer, because loads should return zeros. */
 		sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
-				     sctx->null_const_buf.buffer->width0, 0);
+				     sctx->null_const_buf.buffer->width0, 0, false);
 	}
 
 	return &sctx->b.b;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 8927e50..3cd252c 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -238,7 +238,7 @@ void si_release_all_descriptors(struct si_context *sctx);
 void si_all_descriptors_begin_new_cs(struct si_context *sctx);
 void si_copy_buffer(struct si_context *sctx,
 		    struct pipe_resource *dst, struct pipe_resource *src,
-		    uint64_t dst_offset, uint64_t src_offset, unsigned size);
+		    uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset);
 
-- 
2.1.0



More information about the mesa-dev mailing list