Mesa (master): radeonsi: move SI and CIK+ SDMA code into 1 common function for cleanups

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jan 6 20:41:55 UTC 2020


Module: Mesa
Branch: master
Commit: 991328498b9b1fa2937c61546bf1f3f4e5949f93
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=991328498b9b1fa2937c61546bf1f3f4e5949f93

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Thu Jan  2 17:02:12 2020 -0500

radeonsi: move SI and CIK+ SDMA code into 1 common function for cleanups

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Reviewed-By: Timur Kristóf <timur.kristof at gmail.com>

---

 src/gallium/drivers/radeonsi/Makefile.sources   |   1 -
 src/gallium/drivers/radeonsi/cik_sdma.c         |  60 +------------
 src/gallium/drivers/radeonsi/meson.build        |   1 -
 src/gallium/drivers/radeonsi/si_blit.c          |   1 -
 src/gallium/drivers/radeonsi/si_buffer.c        |   6 +-
 src/gallium/drivers/radeonsi/si_dma.c           | 110 ------------------------
 src/gallium/drivers/radeonsi/si_dma_cs.c        |  89 +++++++++++++++++++
 src/gallium/drivers/radeonsi/si_gfx_cs.c        |   6 +-
 src/gallium/drivers/radeonsi/si_pipe.c          |   2 +-
 src/gallium/drivers/radeonsi/si_pipe.h          |   6 +-
 src/gallium/drivers/radeonsi/si_test_dma_perf.c |   4 +-
 src/gallium/drivers/radeonsi/si_texture.c       |   8 +-
 12 files changed, 104 insertions(+), 190 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index f25309736c9..886aaf6fa34 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -19,7 +19,6 @@ C_SOURCES := \
 	si_cp_dma.c \
 	si_debug.c \
 	si_descriptors.c \
-	si_dma.c \
 	si_dma_cs.c \
 	si_fence.c \
 	si_get.c \
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 3b07c964eed..df8a2fcd577 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -26,58 +26,6 @@
 #include "sid.h"
 #include "si_pipe.h"
 
-static void cik_sdma_copy_buffer(struct si_context *ctx,
-				 struct pipe_resource *dst,
-				 struct pipe_resource *src,
-				 uint64_t dst_offset,
-				 uint64_t src_offset,
-				 uint64_t size)
-{
-	struct radeon_cmdbuf *cs = ctx->sdma_cs;
-	unsigned i, ncopy, csize;
-	unsigned align = ~0u;
-	struct si_resource *sdst = si_resource(dst);
-	struct si_resource *ssrc = si_resource(src);
-
-	/* Mark the buffer range of destination as valid (initialized),
-	 * so that transfer_map knows it should wait for the GPU when mapping
-	 * that range. */
-	util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
-		       dst_offset + size);
-
-	dst_offset += sdst->gpu_address;
-	src_offset += ssrc->gpu_address;
-
-	ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
-
-	/* Align copy size to dw if src/dst address are dw aligned */
-	if ((src_offset & 0x3) == 0 &&
-	    (dst_offset & 0x3) == 0 &&
-	    size > 4 &&
-	    (size & 3) != 0) {
-		align = ~0x3u;
-		ncopy++;
-	}
-
-	si_need_dma_space(ctx, ncopy * 7, sdst, ssrc);
-
-	for (i = 0; i < ncopy; i++) {
-		csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size;
-		radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
-						CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
-						0));
-		radeon_emit(cs, ctx->chip_class >= GFX9 ? csize - 1 : csize);
-		radeon_emit(cs, 0); /* src/dst endian swap */
-		radeon_emit(cs, src_offset);
-		radeon_emit(cs, src_offset >> 32);
-		radeon_emit(cs, dst_offset);
-		radeon_emit(cs, dst_offset >> 32);
-		dst_offset += csize;
-		src_offset += csize;
-		size -= csize;
-	}
-}
-
 static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
 {
 	width = u_minify(width, level);
@@ -680,17 +628,13 @@ static void cik_sdma_copy(struct pipe_context *ctx,
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
+	assert(src->target != PIPE_BUFFER);
+
 	if (!sctx->sdma_cs ||
 	    src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
 	    dst->flags & PIPE_RESOURCE_FLAG_SPARSE)
 		goto fallback;
 
-	/* If src is a buffer and dst is a texture, we are uploading metadata. */
-	if (src->target == PIPE_BUFFER) {
-		cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
-		return;
-	}
-
 	/* SDMA causes corruption. See:
 	 *   https://bugs.freedesktop.org/show_bug.cgi?id=110575
 	 *   https://bugs.freedesktop.org/show_bug.cgi?id=110635
diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build
index 3baf70a020e..d2d3dd684b0 100644
--- a/src/gallium/drivers/radeonsi/meson.build
+++ b/src/gallium/drivers/radeonsi/meson.build
@@ -34,7 +34,6 @@ files_libradeonsi = files(
   'si_cp_dma.c',
   'si_debug.c',
   'si_descriptors.c',
-  'si_dma.c',
   'si_dma_cs.c',
   'si_fence.c',
   'si_get.c',
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 9c48bf42a3a..643b15a09b4 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -1212,7 +1212,6 @@ static void si_blit(struct pipe_context *ctx,
 	 * on failure (recursion).
 	 */
 	if (dst->surface.is_linear &&
-	    sctx->dma_copy &&
 	    util_can_blit_via_copy_region(info, false)) {
 		sctx->dma_copy(ctx, info->dst.resource, info->dst.level,
 				 info->dst.box.x, info->dst.box.y,
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 220a4cbdcbf..1de431cc937 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -503,9 +503,9 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
 				box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT)));
 		if (staging) {
 			/* Copy the VRAM buffer to the staging buffer. */
-			sctx->dma_copy(ctx, &staging->b.b, 0,
-				       box->x % SI_MAP_BUFFER_ALIGNMENT,
-				       0, 0, resource, 0, box);
+			si_sdma_copy_buffer(sctx, &staging->b.b, resource,
+					    box->x % SI_MAP_BUFFER_ALIGNMENT,
+					    box->x, box->width);
 
 			data = si_buffer_map_sync_with_rings(sctx, staging,
 							     usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
deleted file mode 100644
index afcc38bbf01..00000000000
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse at freedesktop.org>
- * Copyright 2018 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sid.h"
-#include "si_pipe.h"
-
-#include "util/format/u_format.h"
-
-static void si_dma_copy_buffer(struct si_context *ctx,
-				struct pipe_resource *dst,
-				struct pipe_resource *src,
-				uint64_t dst_offset,
-				uint64_t src_offset,
-				uint64_t size)
-{
-	struct radeon_cmdbuf *cs = ctx->sdma_cs;
-	unsigned i, ncopy, count, max_size, sub_cmd, shift;
-	struct si_resource *sdst = si_resource(dst);
-	struct si_resource *ssrc = si_resource(src);
-
-	/* Mark the buffer range of destination as valid (initialized),
-	 * so that transfer_map knows it should wait for the GPU when mapping
-	 * that range. */
-	util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
-		       dst_offset + size);
-
-	dst_offset += sdst->gpu_address;
-	src_offset += ssrc->gpu_address;
-
-	/* see whether we should use the dword-aligned or byte-aligned copy */
-	if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
-		sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
-		shift = 2;
-		max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE;
-	} else {
-		sub_cmd = SI_DMA_COPY_BYTE_ALIGNED;
-		shift = 0;
-		max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
-	}
-
-	ncopy = DIV_ROUND_UP(size, max_size);
-	si_need_dma_space(ctx, ncopy * 5, sdst, ssrc);
-
-	for (i = 0; i < ncopy; i++) {
-		count = MIN2(size, max_size);
-		radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
-					      count >> shift));
-		radeon_emit(cs, dst_offset);
-		radeon_emit(cs, src_offset);
-		radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
-		radeon_emit(cs, (src_offset >> 32UL) & 0xff);
-		dst_offset += count;
-		src_offset += count;
-		size -= count;
-	}
-}
-
-static void si_dma_copy(struct pipe_context *ctx,
-			struct pipe_resource *dst,
-			unsigned dst_level,
-			unsigned dstx, unsigned dsty, unsigned dstz,
-			struct pipe_resource *src,
-			unsigned src_level,
-			const struct pipe_box *src_box)
-{
-	struct si_context *sctx = (struct si_context *)ctx;
-
-	if (sctx->sdma_cs == NULL ||
-	    src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
-	    dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
-		goto fallback;
-	}
-
-	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-		si_dma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
-		return;
-	}
-
-	/* SI SDMA image copies are unimplemented. */
-fallback:
-	si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
-				src, src_level, src_box);
-}
-
-void si_init_dma_functions(struct si_context *sctx)
-{
-	sctx->dma_copy = si_dma_copy;
-}
diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c
index 8671c027c03..c58b2b103be 100644
--- a/src/gallium/drivers/radeonsi/si_dma_cs.c
+++ b/src/gallium/drivers/radeonsi/si_dma_cs.c
@@ -125,6 +125,95 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 	}
 }
 
+void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
+			 struct pipe_resource *src, uint64_t dst_offset,
+			 uint64_t src_offset, uint64_t size)
+{
+	struct radeon_cmdbuf *cs = sctx->sdma_cs;
+	unsigned i, ncopy, csize;
+	struct si_resource *sdst = si_resource(dst);
+	struct si_resource *ssrc = si_resource(src);
+
+	if (!cs ||
+	    dst->flags & PIPE_RESOURCE_FLAG_SPARSE ||
+	    src->flags & PIPE_RESOURCE_FLAG_SPARSE) {
+		si_copy_buffer(sctx, dst, src, dst_offset, src_offset, size);
+		return;
+	}
+
+	/* Mark the buffer range of destination as valid (initialized),
+	 * so that transfer_map knows it should wait for the GPU when mapping
+	 * that range. */
+	util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
+		       dst_offset + size);
+
+	dst_offset += sdst->gpu_address;
+	src_offset += ssrc->gpu_address;
+
+	if (sctx->chip_class == GFX6) {
+		unsigned max_size, sub_cmd, shift;
+
+		/* see whether we should use the dword-aligned or byte-aligned copy */
+		if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
+			sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
+			shift = 2;
+			max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE;
+		} else {
+			sub_cmd = SI_DMA_COPY_BYTE_ALIGNED;
+			shift = 0;
+			max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
+		}
+
+		ncopy = DIV_ROUND_UP(size, max_size);
+		si_need_dma_space(sctx, ncopy * 5, sdst, ssrc);
+
+		for (i = 0; i < ncopy; i++) {
+			csize = MIN2(size, max_size);
+			radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
+						      csize >> shift));
+			radeon_emit(cs, dst_offset);
+			radeon_emit(cs, src_offset);
+			radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
+			radeon_emit(cs, (src_offset >> 32UL) & 0xff);
+			dst_offset += csize;
+			src_offset += csize;
+			size -= csize;
+		}
+		return;
+	}
+
+	/* The following code is for CI and later. */
+	unsigned align = ~0u;
+	ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
+
+	/* Align copy size to dw if src/dst address are dw aligned */
+	if ((src_offset & 0x3) == 0 &&
+	    (dst_offset & 0x3) == 0 &&
+	    size > 4 &&
+	    (size & 3) != 0) {
+		align = ~0x3u;
+		ncopy++;
+	}
+
+	si_need_dma_space(sctx, ncopy * 7, sdst, ssrc);
+
+	for (i = 0; i < ncopy; i++) {
+		csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size;
+		radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+						CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
+						0));
+		radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize);
+		radeon_emit(cs, 0); /* src/dst endian swap */
+		radeon_emit(cs, src_offset);
+		radeon_emit(cs, src_offset >> 32);
+		radeon_emit(cs, dst_offset);
+		radeon_emit(cs, dst_offset >> 32);
+		dst_offset += csize;
+		src_offset += csize;
+		size -= csize;
+	}
+}
+
 void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
 		       struct si_resource *dst, struct si_resource *src)
 {
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index ddaf2af3349..15f3d238ac5 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -119,14 +119,12 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 	ctx->sdma_uploads_in_progress = true;
 	for (unsigned i = 0; i < ctx->num_sdma_uploads; i++) {
 		struct si_sdma_upload *up = &ctx->sdma_uploads[i];
-		struct pipe_box box;
 
 		assert(up->src_offset % 4 == 0 && up->dst_offset % 4 == 0 &&
 		       up->size % 4 == 0);
 
-		u_box_1d(up->src_offset, up->size, &box);
-		ctx->dma_copy(&ctx->b, &up->dst->b.b, 0, up->dst_offset, 0, 0,
-			      &up->src->b.b, 0, &box);
+		si_sdma_copy_buffer(ctx, &up->dst->b.b, &up->src->b.b,
+				    up->dst_offset, up->src_offset, up->size);
 	}
 	ctx->sdma_uploads_in_progress = false;
 	si_unref_sdma_uploads(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index e5f9b29a9ec..3f84725f8f2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -595,7 +595,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	if (sctx->chip_class >= GFX7)
 		cik_init_sdma_functions(sctx);
 	else
-		si_init_dma_functions(sctx);
+		sctx->dma_copy = si_resource_copy_region;
 
 	if (sscreen->debug_flags & DBG(FORCE_SDMA))
 		sctx->b.resource_copy_region = sctx->dma_copy;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e222de1e906..519b0050fd3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1363,14 +1363,14 @@ void si_check_vm_faults(struct si_context *sctx,
 			struct radeon_saved_cs *saved, enum ring_type ring);
 bool si_replace_shader(unsigned num, struct si_shader_binary *binary);
 
-/* si_dma.c */
-void si_init_dma_functions(struct si_context *sctx);
-
 /* si_dma_cs.c */
 void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst,
 			   uint64_t offset);
 void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 			  uint64_t offset, uint64_t size, unsigned clear_value);
+void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
+			 struct pipe_resource *src, uint64_t dst_offset,
+			 uint64_t src_offset, uint64_t size);
 void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
 		       struct si_resource *dst, struct si_resource *src);
 void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c
index c796cc164a3..4eec3d12459 100644
--- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c
+++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c
@@ -191,9 +191,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
 					} else if (test_sdma) {
 						/* SDMA */
 						if (is_copy) {
-							struct pipe_box box;
-							u_box_1d(0, size, &box);
-							sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box);
+							si_sdma_copy_buffer(sctx, dst, src, 0, 0, size);
 						} else {
 							si_sdma_clear_buffer(sctx, dst, 0, size, clear_value);
 						}
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index 9fc669a6abd..183d5bd5294 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -1513,14 +1513,12 @@ si_texture_create_object(struct pipe_screen *screen,
 
 			/* Copy the staging buffer to the buffer backing the texture. */
 			struct si_context *sctx = (struct si_context*)sscreen->aux_context;
-			struct pipe_box box;
-			u_box_1d(0, buf->b.b.width0, &box);
 
 			assert(tex->surface.dcc_retile_map_offset <= UINT_MAX);
 			simple_mtx_lock(&sscreen->aux_context_lock);
-			sctx->dma_copy(&sctx->b, &tex->buffer.b.b, 0,
-				       tex->surface.dcc_retile_map_offset, 0, 0,
-				       &buf->b.b, 0, &box);
+			si_sdma_copy_buffer(sctx, &tex->buffer.b.b, &buf->b.b,
+					    tex->surface.dcc_retile_map_offset,
+					    0, buf->b.b.width0);
 			sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
 			simple_mtx_unlock(&sscreen->aux_context_lock);
 




More information about the mesa-commit mailing list