[Mesa-dev] [PATCH 12/15] radeonsi: split si_copy_buffer

Marek Olšák maraeo at gmail.com
Wed Aug 29 20:13:08 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

compute and SDMA will be added into it.
---
 src/gallium/drivers/radeonsi/si_blit.c        |  2 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c      | 33 ++++++++++++-------
 src/gallium/drivers/radeonsi/si_pipe.c        |  3 +-
 src/gallium/drivers/radeonsi/si_pipe.h        |  8 +++--
 .../drivers/radeonsi/si_test_dma_perf.c       |  4 +--
 5 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index fcaff80125c..8f7aa0815b9 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -903,21 +903,21 @@ void si_resource_copy_region(struct pipe_context *ctx,
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_texture *ssrc = (struct si_texture*)src;
 	struct pipe_surface *dst_view, dst_templ;
 	struct pipe_sampler_view src_templ, *src_view;
 	unsigned dst_width, dst_height, src_width0, src_height0;
 	unsigned dst_width0, dst_height0, src_force_level = 0;
 	struct pipe_box sbox, dstbox;
 
 	/* Handle buffers first. */
 	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, 0, -1);
+		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
 		return;
 	}
 
 	assert(u_max_sample(dst) == u_max_sample(src));
 
 	/* The driver doesn't decompress resources automatically while
 	 * u_blitter is rendering. */
 	si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level,
 				  src_box->z, src_box->z + src_box->depth - 1);
 
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index ad53682b1b2..e85bb9b1acf 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -426,36 +426,32 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
 	va = sctx->scratch_buffer->gpu_address;
 	si_emit_cp_dma(sctx, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags,
 		       cache_policy);
 }
 
 /**
  * Do memcpy between buffers using CP DMA.
  *
  * \param user_flags	bitmask of SI_CPDMA_*
  */
-void si_copy_buffer(struct si_context *sctx,
-		    struct pipe_resource *dst, struct pipe_resource *src,
-		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
-		    unsigned user_flags, enum si_cache_policy cache_policy)
+void si_cp_dma_copy_buffer(struct si_context *sctx,
+			   struct pipe_resource *dst, struct pipe_resource *src,
+			   uint64_t dst_offset, uint64_t src_offset, unsigned size,
+			   unsigned user_flags, enum si_coherency coher,
+			   enum si_cache_policy cache_policy)
 {
 	uint64_t main_dst_offset, main_src_offset;
 	unsigned skipped_size = 0;
 	unsigned realign_size = 0;
-	enum si_coherency coher = SI_COHERENCY_SHADER;
 	bool is_first = true;
 
-	if (!size)
-		return;
-
-	if (cache_policy == -1)
-		cache_policy = get_cache_policy(sctx, coher);
+	assert(size);
 
 	if (dst != src || dst_offset != src_offset) {
 		/* Mark the buffer range of destination as valid (initialized),
 		 * so that transfer_map knows it should wait for the GPU when mapping
 		 * that range. */
 		util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
 			       dst_offset + size);
 	}
 
 	dst_offset += r600_resource(dst)->gpu_address;
@@ -520,35 +516,50 @@ void si_copy_buffer(struct si_context *sctx,
 
 		si_emit_cp_dma(sctx, dst_offset, src_offset, skipped_size,
 			       dma_flags, cache_policy);
 	}
 
 	/* Finally, realign the engine if the size wasn't aligned. */
 	if (realign_size) {
 		si_cp_dma_realign_engine(sctx, realign_size, user_flags, coher,
 					 cache_policy, &is_first);
 	}
+}
+
+void si_copy_buffer(struct si_context *sctx,
+		    struct pipe_resource *dst, struct pipe_resource *src,
+		    uint64_t dst_offset, uint64_t src_offset, unsigned size)
+{
+	enum si_coherency coher = SI_COHERENCY_SHADER;
+	enum si_cache_policy cache_policy = get_cache_policy(sctx, coher);
+
+	if (!size)
+		return;
+
+	si_cp_dma_copy_buffer(sctx, dst, src, dst_offset, src_offset, size,
+			      0, coher, cache_policy);
 
 	if (cache_policy != L2_BYPASS)
 		r600_resource(dst)->TC_L2_dirty = true;
 
 	/* If it's not a prefetch... */
 	if (dst_offset != src_offset)
 		sctx->num_cp_dma_calls++;
 }
 
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
 			      uint64_t offset, unsigned size)
 {
 	assert(sctx->chip_class >= CIK);
 
-	si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL, L2_LRU);
+	si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size,
+			      SI_CPDMA_SKIP_ALL, SI_COHERENCY_SHADER, L2_LRU);
 }
 
 static void cik_prefetch_shader_async(struct si_context *sctx,
 				      struct si_pm4_state *state)
 {
 	struct pipe_resource *bo = &state->bo[0]->b.b;
 	assert(state->nbo == 1);
 
 	cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index bce7b3f550e..8ef29c25df2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -723,21 +723,22 @@ static void si_test_vmfault(struct si_screen *sscreen)
 		pipe_buffer_create_const0(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 64);
 
 	if (!buf) {
 		puts("Buffer allocation failed.");
 		exit(1);
 	}
 
 	r600_resource(buf)->gpu_address = 0; /* cause a VM fault */
 
 	if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) {
-		si_copy_buffer(sctx, buf, buf, 0, 4, 4, 0, -1);
+		si_cp_dma_copy_buffer(sctx, buf, buf, 0, 4, 4, 0,
+				      SI_COHERENCY_NONE, L2_BYPASS);
 		ctx->flush(ctx, NULL, 0);
 		puts("VM fault test: CP - done.");
 	}
 	if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) {
 		si_sdma_clear_buffer(sctx, buf, 0, 4, 0);
 		ctx->flush(ctx, NULL, 0);
 		puts("VM fault test: SDMA - done.");
 	}
 	if (sscreen->debug_flags & DBG(TEST_VMFAULT_SHADER)) {
 		util_test_constant_buffer(ctx, buf);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index a6f09b65f74..29d7e555a0c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1120,24 +1120,28 @@ enum si_coherency {
 };
 
 void si_cp_dma_wait_for_idle(struct si_context *sctx);
 void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 			    uint64_t offset, uint64_t size, unsigned value,
 			    enum si_coherency coher,
 			    enum si_cache_policy cache_policy);
 void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 		     uint64_t offset, uint64_t size, unsigned value,
 		     enum si_coherency coher);
+void si_cp_dma_copy_buffer(struct si_context *sctx,
+			   struct pipe_resource *dst, struct pipe_resource *src,
+			   uint64_t dst_offset, uint64_t src_offset, unsigned size,
+			   unsigned user_flags, enum si_coherency coher,
+			   enum si_cache_policy cache_policy);
 void si_copy_buffer(struct si_context *sctx,
 		    struct pipe_resource *dst, struct pipe_resource *src,
-		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
-		    unsigned user_flags, enum si_cache_policy cache_policy);
+		    uint64_t dst_offset, uint64_t src_offset, unsigned size);
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
 			      uint64_t offset, unsigned size);
 void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
 void si_init_cp_dma_functions(struct si_context *sctx);
 
 /* si_debug.c */
 void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
 		struct radeon_saved_cs *saved, bool get_buffer_list);
 void si_clear_saved_cs(struct radeon_saved_cs *saved);
 void si_destroy_saved_cs(struct si_saved_cs *scs);
diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c
index f097a642999..6c04720e963 100644
--- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c
+++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c
@@ -171,22 +171,22 @@ void si_test_dma_perf(struct si_screen *sscreen)
 				src = is_copy ? pipe_buffer_create(screen, 0, src_usage, size) : NULL;
 
 				/* Run tests. */
 				for (unsigned iter = 0; iter < NUM_RUNS; iter++) {
 					q[iter] = ctx->create_query(ctx, query_type, 0);
 					ctx->begin_query(ctx, q[iter]);
 
 					if (test_cp) {
 						/* CP DMA */
 						if (is_copy) {
-							si_copy_buffer(sctx, dst, src, 0, 0, size, 0,
-								       cache_policy);
+							si_cp_dma_copy_buffer(sctx, dst, src, 0, 0, size, 0,
+									      SI_COHERENCY_NONE, cache_policy);
 						} else {
 							si_cp_dma_clear_buffer(sctx, dst, 0, size, clear_value,
 									       SI_COHERENCY_NONE, cache_policy);
 						}
 					} else if (test_sdma) {
 						/* SDMA */
 						if (is_copy) {
 							struct pipe_box box;
 							u_box_1d(0, size, &box);
 							sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box);
-- 
2.17.1



More information about the mesa-dev mailing list