[Mesa-dev] [PATCH 01/12] radeonsi: implement SDMA-based buffer clearing for CIK-VI

Marek Olšák maraeo at gmail.com
Mon Jan 2 22:54:06 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeon/r600_pipe_common.c |  9 ++++++
 src/gallium/drivers/radeon/r600_pipe_common.h |  3 ++
 src/gallium/drivers/radeonsi/cik_sdma.c       | 42 +++++++++++++++++++++++++++
 3 files changed, 54 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index d45a385..53e0ed6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -530,20 +530,28 @@ bool r600_check_device_reset(struct r600_common_context *rctx)
 		return false;
 
 	status = rctx->b.get_device_reset_status(&rctx->b);
 	if (status == PIPE_NO_RESET)
 		return false;
 
 	rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
 	return true;
 }
 
+static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx,
+					   struct pipe_resource *dst,
+					   uint64_t offset, uint64_t size,
+					   unsigned value)
+{
+	ctx->clear_buffer(ctx, dst, offset, size, &value, 4);
+}
+
 bool r600_common_context_init(struct r600_common_context *rctx,
 			      struct r600_common_screen *rscreen,
 			      unsigned context_flags)
 {
 	slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
 
 	rctx->screen = rscreen;
 	rctx->ws = rscreen->ws;
 	rctx->family = rscreen->family;
 	rctx->chip_class = rscreen->chip_class;
@@ -556,20 +564,21 @@ bool r600_common_context_init(struct r600_common_context *rctx,
 		rctx->max_db = 4;
 
 	rctx->b.invalidate_resource = r600_invalidate_resource;
 	rctx->b.transfer_map = u_transfer_map_vtbl;
 	rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
 	rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
 	rctx->b.texture_subdata = u_default_texture_subdata;
 	rctx->b.memory_barrier = r600_memory_barrier;
 	rctx->b.flush = r600_flush_from_st;
 	rctx->b.set_debug_callback = r600_set_debug_callback;
+	rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
 
 	/* evergreen_compute.c has a special codepath for global buffers.
 	 * Everything else can use the direct path.
 	 */
 	if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) &&
 	    (context_flags & PIPE_CONTEXT_COMPUTE_ONLY))
 		rctx->b.buffer_subdata = u_default_buffer_subdata;
 	else
 		rctx->b.buffer_subdata = r600_buffer_subdata;
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 8ebaed7..faf763c 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -622,20 +622,23 @@ struct r600_common_context {
 
 	/* Copy one resource to another using async DMA. */
 	void (*dma_copy)(struct pipe_context *ctx,
 			 struct pipe_resource *dst,
 			 unsigned dst_level,
 			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
 			 struct pipe_resource *src,
 			 unsigned src_level,
 			 const struct pipe_box *src_box);
 
+	void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+				 uint64_t offset, uint64_t size, unsigned value);
+
 	void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
 			     uint64_t offset, uint64_t size, unsigned value,
 			     enum r600_coherency coher);
 
 	void (*blit_decompress_depth)(struct pipe_context *ctx,
 				      struct r600_texture *texture,
 				      struct r600_texture *staging,
 				      unsigned first_level, unsigned last_level,
 				      unsigned first_layer, unsigned last_layer,
 				      unsigned first_sample, unsigned last_sample);
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index b3eaed5..698f8f6 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -75,20 +75,61 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
 	util_range_add(&rdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 
 	cik_sdma_do_copy_buffer(ctx, dst, src, dst_offset, src_offset, size);
 	r600_dma_emit_wait_idle(&ctx->b);
 }
 
+static void cik_sdma_clear_buffer(struct pipe_context *ctx,
+				  struct pipe_resource *dst,
+				  uint64_t offset,
+				  uint64_t size,
+				  unsigned clear_value)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct radeon_winsys_cs *cs = sctx->b.dma.cs;
+	unsigned i, ncopy, csize;
+	struct r600_resource *rdst = r600_resource(dst);
+
+	if (!cs || offset % 4 != 0 || size % 4 != 0) {
+		ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
+		return;
+	}
+
+	/* Mark the buffer range of destination as valid (initialized),
+	 * so that transfer_map knows it should wait for the GPU when mapping
+	 * that range. */
+	util_range_add(&rdst->valid_buffer_range, offset, offset + size);
+
+	offset += rdst->gpu_address;
+
+	/* the same maximum size as for copying */
+	ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
+	r600_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL);
+
+	for (i = 0; i < ncopy; i++) {
+		csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
+		radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0,
+						0x8000 /* dword copy */));
+		radeon_emit(cs, offset);
+		radeon_emit(cs, offset >> 32);
+		radeon_emit(cs, clear_value);
+		radeon_emit(cs, csize);
+		offset += csize;
+		size -= csize;
+	}
+	r600_dma_emit_wait_idle(&sctx->b);
+}
+
 static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
 {
 	width = u_minify(width, level);
 	return DIV_ROUND_UP(width, blk_w);
 }
 
 static unsigned encode_tile_info(struct si_context *sctx,
 				 struct r600_texture *tex, unsigned level,
 				 bool set_bpp)
 {
@@ -518,11 +559,12 @@ static void cik_sdma_copy(struct pipe_context *ctx,
 		return;
 
 fallback:
 	si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
 				src, src_level, src_box);
 }
 
 void cik_init_sdma_functions(struct si_context *sctx)
 {
 	sctx->b.dma_copy = cik_sdma_copy;
+	sctx->b.dma_clear_buffer = cik_sdma_clear_buffer;
 }
-- 
2.7.4



More information about the mesa-dev mailing list