[Mesa-dev] [PATCH 13/18] radeonsi: implement clear_buffer using CP DMA, initialize CMASK with it

Marek Olšák maraeo at gmail.com
Wed Sep 25 18:35:33 PDT 2013


From: Marek Olšák <marek.olsak at amd.com>

More work needs to be done for this to be entirely shared with r600g.
I'm just trying to share r600_texture.c now.

The reason I put the implementation to si_descriptors.c is that the emit
function had already been there.
---
 src/gallium/drivers/radeonsi/r600_texture.c   |  5 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.c  | 32 +++++-----
 src/gallium/drivers/radeonsi/si_descriptors.c | 85 ++++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  5 +-
 4 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/r600_texture.c b/src/gallium/drivers/radeonsi/r600_texture.c
index aa307fb..53452c8 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -462,8 +462,9 @@ r600_texture_create_object(struct pipe_screen *screen,
 
 	if (rtex->cmask.size) {
 		/* Initialize the cmask to 0xCC (= compressed state). */
-		char *map = rscreen->b.ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE);
-		memset(map + rtex->cmask.offset, 0xCC, rtex->cmask.size);
+		r600_screen_clear_buffer(&rscreen->b, &resource->b.b,
+					 rtex->cmask.offset, rtex->cmask.size,
+					 0xCCCCCCCC);
 	}
 
 	if (rscreen->b.debug_flags & DBG_TEX_DEPTH && rtex->is_depth) {
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index 5528e19..8ed5d26 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -653,6 +653,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
 	if (!radeon_winsys_unref(rscreen->b.ws))
 		return;
 
+	r600_common_screen_cleanup(&rscreen->b);
+
 	if (rscreen->fences.bo) {
 		struct r600_fence_block *entry, *tmp;
 
@@ -823,18 +825,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 		return NULL;
 	}
 
-	r600_common_screen_init(&rscreen->b, ws);
-
-	if (debug_get_bool_option("RADEON_PRINT_TEXDEPTH", FALSE))
-		rscreen->b.debug_flags |= DBG_TEX_DEPTH;
-	if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
-		rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
-
-	if (r600_init_tiling(rscreen)) {
-		FREE(rscreen);
-		return NULL;
-	}
-
+	/* Set functions first. */
+	rscreen->b.b.context_create = r600_create_context;
 	rscreen->b.b.destroy = r600_destroy_screen;
 	rscreen->b.b.get_name = r600_get_name;
 	rscreen->b.b.get_vendor = r600_get_vendor;
@@ -844,12 +836,9 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 	rscreen->b.b.get_compute_param = r600_get_compute_param;
 	rscreen->b.b.get_timestamp = r600_get_timestamp;
 	rscreen->b.b.is_format_supported = si_is_format_supported;
-	rscreen->b.b.context_create = r600_create_context;
 	rscreen->b.b.fence_reference = r600_fence_reference;
 	rscreen->b.b.fence_signalled = r600_fence_signalled;
 	rscreen->b.b.fence_finish = r600_fence_finish;
-	r600_init_screen_resource_functions(&rscreen->b.b);
-
 	if (rscreen->b.info.has_uvd) {
 		rscreen->b.b.get_video_param = ruvd_get_video_param;
 		rscreen->b.b.is_video_format_supported = ruvd_is_format_supported;
@@ -857,6 +846,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 		rscreen->b.b.get_video_param = r600_get_video_param;
 		rscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported;
 	}
+	r600_init_screen_resource_functions(&rscreen->b.b);
+
+	r600_common_screen_init(&rscreen->b, ws);
+
+	if (debug_get_bool_option("RADEON_PRINT_TEXDEPTH", FALSE))
+		rscreen->b.debug_flags |= DBG_TEX_DEPTH;
+	if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
+		rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
+
+	if (r600_init_tiling(rscreen)) {
+		FREE(rscreen);
+		return NULL;
+	}
 
 	util_format_s3tc_init();
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a8f8781..93d3684 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -44,7 +44,7 @@ static uint32_t null_desc[8]; /* zeros */
 #define SI_CP_DMA_RAW_WAIT	(1 << 1) /* SI+ */
 
 /* Emit a CP DMA packet to do a copy from one buffer to another.
- * The size must fit in bits [20:0]. Notes:
+ * The size must fit in bits [20:0].
  */
 static void si_emit_cp_dma_copy_buffer(struct r600_context *rctx,
 				       uint64_t dst_va, uint64_t src_va,
@@ -517,6 +517,88 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 	si_update_descriptors(rctx, &buffers->desc);
 }
 
+/* CP DMA */
+
+/* The max number of bytes to copy per packet. */
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+
+static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
+			    unsigned offset, unsigned size, unsigned value)
+{
+	struct r600_context *rctx = (struct r600_context*)ctx;
+
+	if (!size)
+		return;
+
+	/* Fallback for unaligned clears. */
+	if (offset % 4 != 0 || size % 4 != 0) {
+		uint32_t *map = rctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
+						       rctx->b.rings.gfx.cs,
+						       PIPE_TRANSFER_WRITE);
+		size /= 4;
+		for (unsigned i = 0; i < size; i++)
+			*map++ = value;
+
+		util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+			       offset + size);
+		return;
+	}
+
+	uint64_t va = r600_resource_va(&rctx->screen->b.b, dst) + offset;
+
+	/* Flush the caches where the resource is bound. */
+	/* XXX only flush the caches where the buffer is bound. */
+	rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+			 R600_CONTEXT_INV_CONST_CACHE |
+			 R600_CONTEXT_FLUSH_AND_INV_CB |
+			 R600_CONTEXT_FLUSH_AND_INV_DB |
+			 R600_CONTEXT_FLUSH_AND_INV_CB_META |
+			 R600_CONTEXT_FLUSH_AND_INV_DB_META;
+	rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+
+	while (size) {
+		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+		unsigned dma_flags = 0;
+
+		si_need_cs_space(rctx, 7 + (rctx->b.flags ? rctx->cache_flush.num_dw : 0),
+				 FALSE);
+
+		/* This must be done after need_cs_space. */
+		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+				      (struct r600_resource*)dst, RADEON_USAGE_WRITE);
+
+		/* Flush the caches for the first copy only.
+		 * Also wait for the previous CP DMA operations. */
+		if (rctx->b.flags) {
+			si_emit_cache_flush(&rctx->b, NULL);
+			dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
+		}
+
+		/* Do the synchronization after the last copy, so that all data is written to memory. */
+		if (size == byte_count)
+			dma_flags |= R600_CP_DMA_SYNC;
+
+		/* Emit the clear packet. */
+		si_emit_cp_dma_clear_buffer(rctx, va, byte_count, value, dma_flags);
+
+		size -= byte_count;
+		va += byte_count;
+	}
+
+	/* Flush the caches again in case the 3D engine has been prefetching
+	 * the resource. */
+	/* XXX only flush the caches where the buffer is bound. */
+	rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+			 R600_CONTEXT_INV_CONST_CACHE |
+			 R600_CONTEXT_FLUSH_AND_INV_CB |
+			 R600_CONTEXT_FLUSH_AND_INV_DB |
+			 R600_CONTEXT_FLUSH_AND_INV_CB_META |
+			 R600_CONTEXT_FLUSH_AND_INV_DB_META;
+
+	util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+		       offset + size);
+}
+
 /* INIT/DEINIT */
 
 void si_init_all_descriptors(struct r600_context *rctx)
@@ -541,6 +623,7 @@ void si_init_all_descriptors(struct r600_context *rctx)
 	/* Set pipe_context functions. */
 	rctx->b.b.set_constant_buffer = si_set_constant_buffer;
 	rctx->b.b.set_stream_output_targets = si_set_streamout_targets;
+	rctx->b.clear_buffer = si_clear_buffer;
 }
 
 void si_release_all_descriptors(struct r600_context *rctx)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index cb5055a..0213523 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -677,7 +677,10 @@ void si_emit_cache_flush(struct r600_common_context *rctx, struct r600_atom *ato
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
 	}
 
-	if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
+	if (rctx->flags & R600_CONTEXT_WAIT_3D_IDLE) {
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+	} else if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
 		/* Needed if streamout buffers are going to be used as a source. */
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-- 
1.8.1.2



More information about the mesa-dev mailing list