[Mesa-dev] [PATCH 11/14] gallium/radeon: flush if DMA IB memory usage is too high

Marek Olšák maraeo at gmail.com
Wed May 4 23:43:40 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

This prevents IB rejections due to insane memory usage from
many concecutive texture uploads.
---
 src/gallium/drivers/r600/evergreen_hw_context.c |  2 +-
 src/gallium/drivers/r600/evergreen_state.c      |  2 +-
 src/gallium/drivers/r600/r600_hw_context.c      |  2 +-
 src/gallium/drivers/r600/r600_state.c           |  2 +-
 src/gallium/drivers/radeon/r600_pipe_common.c   | 27 +++++++++++++++++++++----
 src/gallium/drivers/radeon/r600_pipe_common.h   |  3 ++-
 src/gallium/drivers/radeonsi/cik_sdma.c         |  8 ++++----
 src/gallium/drivers/radeonsi/si_dma.c           |  4 ++--
 8 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index c2dba8c..cd07319 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -60,7 +60,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
 	}
 	ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE);
 
-	r600_need_dma_space(&rctx->b, ncopy * 5);
+	r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc);
 	for (i = 0; i < ncopy; i++) {
 		csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
 		/* emit reloc before writing cs so that cs is always in consistent state */
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index acf60c6..62152c0 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3442,7 +3442,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 
 	size = (copy_height * pitch) / 4;
 	ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE);
-	r600_need_dma_space(&rctx->b, ncopy * 9);
+	r600_need_dma_space(&rctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
 
 	for (i = 0; i < ncopy; i++) {
 		cheight = copy_height;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index fa1028b..857da7f 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -467,7 +467,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
 	size >>= 2; /* convert to dwords */
 	ncopy = (size / R600_DMA_COPY_MAX_SIZE_DW) + !!(size % R600_DMA_COPY_MAX_SIZE_DW);
 
-	r600_need_dma_space(&rctx->b, ncopy * 5);
+	r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc);
 	for (i = 0; i < ncopy; i++) {
 		csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
 		/* emit reloc before writing cs so that cs is always in consistent state */
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 715c6f1..ab0cf5c 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2918,7 +2918,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 	 */
 	cheight = ((R600_DMA_COPY_MAX_SIZE_DW * 4) / pitch) & 0xfffffff8;
 	ncopy = (copy_height / cheight) + !!(copy_height % cheight);
-	r600_need_dma_space(&rctx->b, ncopy * 7);
+	r600_need_dma_space(&rctx->b, ncopy * 7, &rdst->resource, &rsrc->resource);
 
 	for (i = 0; i < ncopy; i++) {
 		cheight = cheight > copy_height ? copy_height : cheight;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 02e17ba..eac7812 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -136,14 +136,33 @@ void r600_draw_rectangle(struct blitter_context *blitter,
 	pipe_resource_reference(&buf, NULL);
 }
 
-void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+                         struct r600_resource *dst, struct r600_resource *src)
 {
+	uint64_t vram = 0, gtt = 0;
+
+	if (dst) {
+		if (dst->domains & RADEON_DOMAIN_VRAM)
+			vram += dst->buf->size;
+		else if (dst->domains & RADEON_DOMAIN_GTT)
+			gtt += dst->buf->size;
+	}
+	if (src) {
+		if (src->domains & RADEON_DOMAIN_VRAM)
+			vram += src->buf->size;
+		else if (src->domains & RADEON_DOMAIN_GTT)
+			gtt += src->buf->size;
+	}
+
 	/* Flush the GFX IB if it's not empty. */
 	if (ctx->gfx.cs->cdw > ctx->initial_gfx_cs_size)
 		ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 
-	/* Flush if there's not enough space. */
-	if ((num_dw + ctx->dma.cs->cdw) > ctx->dma.cs->max_dw) {
+	/* Flush if there's not enough space, or if the memory usage per IB
+	 * is too large.
+	 */
+	if ((num_dw + ctx->dma.cs->cdw) > ctx->dma.cs->max_dw ||
+	    !ctx->ws->cs_memory_below_limit(ctx->dma.cs, vram, gtt)) {
 		ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 		assert((num_dw + ctx->dma.cs->cdw) <= ctx->dma.cs->max_dw);
 	}
@@ -157,7 +176,7 @@ void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
 	/* done at the end of DMA calls, so increment this. */
 	rctx->num_dma_calls++;
 
-	r600_need_dma_space(rctx, 1);
+	r600_need_dma_space(rctx, 1, NULL, NULL);
 
 	if (cs->cdw == 0) /* empty queue */
 		return;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index a6afc43..53baa61 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -597,7 +597,8 @@ void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_re
 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
 						  const struct pipe_resource *templ);
 const char *r600_get_llvm_processor_name(enum radeon_family family);
-void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw);
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+			 struct r600_resource *dst, struct r600_resource *src);
 void r600_dma_emit_wait_idle(struct r600_common_context *rctx);
 
 /* r600_gpu_load.c */
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index ea6a122..29d9a00 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -47,7 +47,7 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
 	src_offset += r600_resource(src)->gpu_address;
 
 	ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
-	r600_need_dma_space(&ctx->b, ncopy * 7);
+	r600_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc);
 
 	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ,
 			      RADEON_PRIO_SDMA_BUFFER);
@@ -212,7 +212,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 	      srcy + copy_height != (1 << 14)))) {
 		struct radeon_winsys_cs *cs = sctx->b.dma.cs;
 
-		r600_need_dma_space(&sctx->b, 13);
+		r600_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource);
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource,
 					  RADEON_USAGE_READ,
 					  RADEON_PRIO_SDMA_TEXTURE);
@@ -380,7 +380,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 		    copy_depth <= (1 << 11)) {
 			struct radeon_winsys_cs *cs = sctx->b.dma.cs;
 
-			r600_need_dma_space(&sctx->b, 14);
+			r600_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource);
 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource,
 						  RADEON_USAGE_READ,
 						  RADEON_PRIO_SDMA_TEXTURE);
@@ -482,7 +482,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 		      dstx + copy_width != (1 << 14)))) {
 			struct radeon_winsys_cs *cs = sctx->b.dma.cs;
 
-			r600_need_dma_space(&sctx->b, 15);
+			r600_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource);
 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.dma, &rsrc->resource,
 						  RADEON_USAGE_READ,
 						  RADEON_PRIO_SDMA_TEXTURE);
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 03321f5..25fb4b7 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -64,7 +64,7 @@ static void si_dma_copy_buffer(struct si_context *ctx,
 	}
 	ncopy = (size / max_csize) + !!(size % max_csize);
 
-	r600_need_dma_space(&ctx->b, ncopy * 5);
+	r600_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc);
 
 	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ,
 			      RADEON_PRIO_SDMA_BUFFER);
@@ -161,7 +161,7 @@ static void si_dma_copy_tile(struct si_context *ctx,
 	mt = G_009910_MICRO_TILE_MODE(tile_mode);
 	size = (copy_height * pitch) / 4;
 	ncopy = (size / SI_DMA_COPY_MAX_SIZE_DW) + !!(size % SI_DMA_COPY_MAX_SIZE_DW);
-	r600_need_dma_space(&ctx->b, ncopy * 9);
+	r600_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
 
 	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rsrc->resource,
 			      RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
-- 
2.7.4



More information about the mesa-dev mailing list