[Mesa-dev] [PATCH 1/3] r600g, radeonsi: use a fallback in dma_copy instead of failing

Marek Olšák maraeo at gmail.com
Sat Mar 8 17:24:51 PST 2014


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/r600/evergreen_state.c      | 37 +++++++++-------
 src/gallium/drivers/r600/r600_state.c           | 41 ++++++++++-------
 src/gallium/drivers/radeon/r600_buffer_common.c | 58 +++++++++++--------------
 src/gallium/drivers/radeon/r600_pipe_common.h   | 17 ++++----
 src/gallium/drivers/radeon/r600_texture.c       | 18 +++-----
 src/gallium/drivers/radeonsi/si_state.c         | 19 ++++----
 6 files changed, 97 insertions(+), 93 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index dca7c58..5e57f8d 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3329,13 +3329,13 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 	}
 }
 
-static boolean evergreen_dma_blit(struct pipe_context *ctx,
-				  struct pipe_resource *dst,
-				  unsigned dst_level,
-				  unsigned dst_x, unsigned dst_y, unsigned dst_z,
-				  struct pipe_resource *src,
-				  unsigned src_level,
-				  const struct pipe_box *src_box)
+static void evergreen_dma_blit(struct pipe_context *ctx,
+			       struct pipe_resource *dst,
+			       unsigned dst_level,
+			       unsigned dst_x, unsigned dst_y, unsigned dst_z,
+			       struct pipe_resource *src,
+			       unsigned src_level,
+			       const struct pipe_box *src_box)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
@@ -3345,19 +3345,22 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx,
 	unsigned src_x, src_y;
 
 	if (rctx->b.rings.dma.cs == NULL) {
-		return FALSE;
+		goto fallback;
 	}
 
 	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+		if (dst_x % 4 || src_box->x % 4 || src_box->width % 4)
+			goto fallback;
+
 		evergreen_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box->width);
-		return TRUE;
+		return;
 	}
 
 	if (src->format != dst->format) {
-		return FALSE;
+		goto fallback;
 	}
 	if (rdst->dirty_level_mask != 0) {
-		return FALSE;
+		goto fallback;
 	}
 	if (rsrc->dirty_level_mask) {
 		ctx->flush_resource(ctx, src);
@@ -3383,13 +3386,13 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx,
 
 	if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
 		/* FIXME evergreen can do partial blit */
-		return FALSE;
+		goto fallback;
 	}
 	/* the x test here are currently useless (because we don't support partial blit)
 	 * but keep them around so we don't forget about those
 	 */
 	if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
-		return FALSE;
+		goto fallback;
 	}
 
 	/* 128 bpp surfaces require non_disp_tiling for both
@@ -3400,7 +3403,7 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx,
 	if ((rctx->b.chip_class == CAYMAN) &&
 	    (src_mode != dst_mode) &&
 	    (util_format_get_blocksize(src->format) >= 16)) {
-		return FALSE;
+		goto fallback;
 	}
 
 	if (src_mode == dst_mode) {
@@ -3423,7 +3426,11 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx,
 					src, src_level, src_x, src_y, src_box->z,
 					copy_height, dst_pitch, bpp);
 	}
-	return TRUE;
+	return;
+
+fallback:
+	ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
+				  src, src_level, src_box);
 }
 
 void evergreen_init_state_functions(struct r600_context *rctx)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 6d89e6c..a0e6d2d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2883,13 +2883,13 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 	return TRUE;
 }
 
-static boolean r600_dma_blit(struct pipe_context *ctx,
-			     struct pipe_resource *dst,
-			     unsigned dst_level,
-			     unsigned dst_x, unsigned dst_y, unsigned dst_z,
-			     struct pipe_resource *src,
-			     unsigned src_level,
-			     const struct pipe_box *src_box)
+static void r600_dma_blit(struct pipe_context *ctx,
+			  struct pipe_resource *dst,
+			  unsigned dst_level,
+			  unsigned dst_x, unsigned dst_y, unsigned dst_z,
+			  struct pipe_resource *src,
+			  unsigned src_level,
+			  const struct pipe_box *src_box)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
@@ -2899,16 +2899,19 @@ static boolean r600_dma_blit(struct pipe_context *ctx,
 	unsigned src_x, src_y;
 
 	if (rctx->b.rings.dma.cs == NULL) {
-		return FALSE;
+		goto fallback;
 	}
 
 	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+		if (dst_x % 4 || src_box->x % 4 || src_box->width % 4)
+			goto fallback;
+
 		r600_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box->width);
-		return TRUE;
+		return;
 	}
 
 	if (src->format != dst->format) {
-		return FALSE;
+		goto fallback;
 	}
 
 	src_x = util_format_get_nblocksx(src->format, src_box->x);
@@ -2931,11 +2934,11 @@ static boolean r600_dma_blit(struct pipe_context *ctx,
 
 	if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
 		/* strick requirement on r6xx/r7xx */
-		return FALSE;
+		goto fallback;
 	}
 	/* lot of constraint on alignment this should capture them all */
 	if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
-		return FALSE;
+		goto fallback;
 	}
 
 	if (src_mode == dst_mode) {
@@ -2955,15 +2958,21 @@ static boolean r600_dma_blit(struct pipe_context *ctx,
 		size = src_box->height * src_pitch;
 		/* must be dw aligned */
 		if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) {
-			return FALSE;
+			goto fallback;
 		}
 		r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size);
 	} else {
-		return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
+		if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
 					src, src_level, src_x, src_y, src_box->z,
-					copy_height, dst_pitch, bpp);
+					copy_height, dst_pitch, bpp)) {
+			goto fallback;
+		}
 	}
-	return TRUE;
+	return;
+
+fallback:
+	ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
+				  src, src_level, src_box);
 }
 
 void r600_init_state_functions(struct r600_context *rctx)
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 90ca8cb..a7ecfb3 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx,
 	return data;
 }
 
+static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
+				     unsigned dstx, unsigned srcx, unsigned size)
+{
+	bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
+
+	return rctx->screen->has_cp_dma ||
+	       (dword_aligned && (rctx->rings.dma.cs ||
+				  rctx->screen->has_streamout));
+
+}
+
 static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                       struct pipe_resource *resource,
                                       unsigned level,
@@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 	else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
 		 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
 		 !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
-		 (rscreen->has_cp_dma ||
-		  (rscreen->has_streamout &&
-		   /* The buffer range must be aligned to 4 with streamout. */
-		   box->x % 4 == 0 && box->width % 4 == 0))) {
+		 r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
 		assert(usage & PIPE_TRANSFER_WRITE);
 
 		/* Check if mapping this buffer would cause waiting for the GPU. */
@@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 		/* At this point, the buffer is always idle (we checked it above). */
 		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 	}
-	/* Using DMA for larger reads is much faster */
+	/* Using a staging buffer in GTT for larger reads is much faster. */
 	else if ((usage & PIPE_TRANSFER_READ) &&
 		 !(usage & PIPE_TRANSFER_WRITE) &&
-		 (rbuffer->domains == RADEON_DOMAIN_VRAM)) {
+		 rbuffer->domains == RADEON_DOMAIN_VRAM &&
+		 r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
 		unsigned offset;
 		struct r600_resource *staging = NULL;
 
@@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 		if (staging) {
 			data += box->x % R600_MAP_BUFFER_ALIGNMENT;
 
-			/* Copy the staging buffer into the original one. */
-			if (rctx->dma_copy(ctx, (struct pipe_resource*)staging, 0,
-						 box->x % R600_MAP_BUFFER_ALIGNMENT,
-						 0, 0, resource, level, box)) {
-				rctx->rings.gfx.flush(rctx, 0);
-				if (rctx->rings.dma.cs)
-					rctx->rings.dma.flush(rctx, 0);
-
-				/* Wait for any offloaded CS flush to complete
-				 * to avoid busy-waiting in the winsys. */
-				rctx->ws->cs_sync_flush(rctx->rings.gfx.cs);
-				if (rctx->rings.dma.cs)
-					rctx->ws->cs_sync_flush(rctx->rings.dma.cs);
-
-				rctx->ws->buffer_wait(staging->buf, RADEON_USAGE_WRITE);
-				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
-								ptransfer, data, staging, offset);
-			} else {
-				pipe_resource_reference((struct pipe_resource**)&staging, NULL);
-			}
+			/* Copy the VRAM buffer to the staging buffer. */
+			rctx->dma_copy(ctx, &staging->b.b, 0,
+				       box->x % R600_MAP_BUFFER_ALIGNMENT,
+				       0, 0, resource, level, box);
+
+			/* Just do the synchronization. The buffer is mapped already. */
+			r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+
+			return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+							ptransfer, data, staging, offset);
 		}
 	}
 
@@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
 			u_box_1d(soffset, size, &box);
 
 			/* Copy the staging buffer into the original one. */
-			if (!(size % 4) && !(doffset % 4) && !(soffset % 4) &&
-			    rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) {
-				/* DONE. */
-			} else {
-				ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
-			}
+			rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
 		}
 		pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
 	}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 493a8fc..5e544c5 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -344,15 +344,14 @@ struct r600_common_context {
 	boolean				current_render_cond_cond;
 	boolean				predicate_drawing;
 
-	/* Copy one resource to another using async DMA.
-	 * False is returned if the copy couldn't be done. */
-	boolean (*dma_copy)(struct pipe_context *ctx,
-			    struct pipe_resource *dst,
-			    unsigned dst_level,
-			    unsigned dst_x, unsigned dst_y, unsigned dst_z,
-			    struct pipe_resource *src,
-			    unsigned src_level,
-			    const struct pipe_box *src_box);
+	/* Copy one resource to another using async DMA. */
+	void (*dma_copy)(struct pipe_context *ctx,
+			 struct pipe_resource *dst,
+			 unsigned dst_level,
+			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
+			 struct pipe_resource *src,
+			 unsigned src_level,
+			 const struct pipe_box *src_box);
 
 	void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
 			     unsigned offset, unsigned size, unsigned value);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index e3b918e..5c32a84 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t
 		return;
 	}
 
-	if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0,
-			      src, transfer->level,
-			      &transfer->box)) {
-		ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
-					  src, transfer->level, &transfer->box);
-	}
+	rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
+		       &transfer->box);
 }
 
 /* Copy from a transfer's staging texture to a full GPU one. */
@@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
 		return;
 	}
 
-	if (!rctx->dma_copy(ctx, dst, transfer->level,
-			      transfer->box.x, transfer->box.y, transfer->box.z,
-			      src, 0, &sbox)) {
-		ctx->resource_copy_region(ctx, dst, transfer->level,
-					  transfer->box.x, transfer->box.y, transfer->box.z,
-					  src, 0, &sbox);
-	}
+	rctx->dma_copy(ctx, dst, transfer->level,
+		       transfer->box.x, transfer->box.y, transfer->box.z,
+		       src, 0, &sbox);
 }
 
 static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 4094421..9210547 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2919,16 +2919,19 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
 	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
 }
 
-static boolean si_dma_copy(struct pipe_context *ctx,
-			   struct pipe_resource *dst,
-			   unsigned dst_level,
-			   unsigned dst_x, unsigned dst_y, unsigned dst_z,
-			   struct pipe_resource *src,
-			   unsigned src_level,
-			   const struct pipe_box *src_box)
+static void si_dma_copy(struct pipe_context *ctx,
+			struct pipe_resource *dst,
+			unsigned dst_level,
+			unsigned dst_x, unsigned dst_y, unsigned dst_z,
+			struct pipe_resource *src,
+			unsigned src_level,
+			const struct pipe_box *src_box)
 {
 	/* XXX implement this or share evergreen_dma_blit with r600g */
-	return FALSE;
+
+	/* Fallback: */
+	ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
+				  src, src_level, src_box);
 }
 
 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
-- 
1.8.3.2



More information about the mesa-dev mailing list