[Mesa-dev] [PATCH 15/18] r600g: move the low-level buffer functions for multiple rings to drivers/radeon

Wed Sep 25 18:35:35 PDT 2013

From: Marek Olšák <marek.olsak at amd.com>

Also slightly optimize r600_buffer_map_sync_with_rings.
---
 src/gallium/drivers/r600/r600_asm.c           |  2 +-
 src/gallium/drivers/r600/r600_blit.c          |  2 +-
 src/gallium/drivers/r600/r600_buffer.c        |  6 +--
 src/gallium/drivers/r600/r600_hw_context.c    |  4 +-
 src/gallium/drivers/r600/r600_pipe.c          | 69 +--------------------------
 src/gallium/drivers/r600/r600_pipe.h          |  6 ---
 src/gallium/drivers/r600/r600_query.c         |  8 ++--
 src/gallium/drivers/r600/r600_shader.c        |  2 +-
 src/gallium/drivers/r600/r600_texture.c       |  4 +-
 src/gallium/drivers/radeon/r600_pipe_common.c | 66 +++++++++++++++++++++++++
 src/gallium/drivers/radeon/r600_pipe_common.h |  6 +++
 11 files changed, 87 insertions(+), 88 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 581fd0e..3cd14fc 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2415,7 +2415,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 		return NULL;
 	}
 
-	bytecode = r600_buffer_mmap_sync_with_rings(rctx, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+	bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
 	bytecode += shader->offset / 4;
 
 	if (R600_BIG_ENDIAN) {
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index d240c29..20faabd 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -649,7 +649,7 @@ static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *ds
 		/* Flush again in case the 3D engine has been prefetching the resource. */
 		r600_flag_resource_cache_flush(rctx, dst);
 	} else {
-		uint32_t *map = r600_buffer_mmap_sync_with_rings(rctx, r600_resource(dst),
+		uint32_t *map = r600_buffer_map_sync_with_rings(&rctx->b, r600_resource(dst),
 								 PIPE_TRANSFER_WRITE);
 		size /= 4;
 		for (unsigned i = 0; i < size; i++)
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 7fdd2ad..8fe78c3 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -112,7 +112,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 		assert(usage & PIPE_TRANSFER_WRITE);
 
 		/* Check if mapping this buffer would cause waiting for the GPU. */
-		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
+		if (r600_rings_is_buffer_referenced(&rctx->b, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
 		    rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
 			unsigned i, mask;
 
@@ -158,7 +158,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 		assert(usage & PIPE_TRANSFER_WRITE);
 
 		/* Check if mapping this buffer would cause waiting for the GPU. */
-		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
+		if (r600_rings_is_buffer_referenced(&rctx->b, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
 		    rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
 			/* Do a wait-free write-only transfer using a temporary buffer. */
 			unsigned offset;
@@ -176,7 +176,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 	}
 
 	/* mmap and synchronize with rings */
-	data = r600_buffer_mmap_sync_with_rings(rctx, rbuffer, usage);
+	data = r600_buffer_map_sync_with_rings(&rctx->b, rbuffer, usage);
 	if (!data) {
 		return NULL;
 	}
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 3714cd7..d8ae30c 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -75,7 +75,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 	va = r600_resource_va(&ctx->screen->b.b, (void*)buffer);
 
 	/* initialize buffer with zeroes */
-	results = r600_buffer_mmap_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
+	results = r600_buffer_map_sync_with_rings(&ctx->b, buffer, PIPE_TRANSFER_WRITE);
 	if (results) {
 		memset(results, 0, ctx->max_db * 4 * 4);
 		ctx->b.ws->buffer_unmap(buffer->cs_buf);
@@ -90,7 +90,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 		cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, buffer, RADEON_USAGE_WRITE);
 
 		/* analyze results */
-		results = r600_buffer_mmap_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
+		results = r600_buffer_map_sync_with_rings(&ctx->b, buffer, PIPE_TRANSFER_READ);
 		if (results) {
 			for(i = 0; i < ctx->max_db; i++) {
 				/* at least highest bit will be set if backend is used */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 87581b4..8ee9487 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -84,7 +84,7 @@ static struct r600_fence *r600_create_fence(struct r600_context *rctx)
 			R600_ERR("r600: failed to create bo for fence objects\n");
 			goto out;
 		}
-		rscreen->fences.data = r600_buffer_mmap_sync_with_rings(rctx, rscreen->fences.bo, PIPE_TRANSFER_READ_WRITE);
+		rscreen->fences.data = r600_buffer_map_sync_with_rings(&rctx->b, rscreen->fences.bo, PIPE_TRANSFER_READ_WRITE);
 	}
 
 	if (!LIST_IS_EMPTY(&rscreen->fences.pool)) {
@@ -213,73 +213,6 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags)
 	rctx->b.rings.dma.flushing = false;
 }
 
-boolean r600_rings_is_buffer_referenced(struct r600_context *ctx,
-					struct radeon_winsys_cs_handle *buf,
-					enum radeon_bo_usage usage)
-{
-	if (ctx->b.ws->cs_is_buffer_referenced(ctx->b.rings.gfx.cs, buf, usage)) {
-		return TRUE;
-	}
-	if (ctx->b.rings.dma.cs) {
-		if (ctx->b.ws->cs_is_buffer_referenced(ctx->b.rings.dma.cs, buf, usage)) {
-			return TRUE;
-		}
-	}
-	return FALSE;
-}
-
-void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx,
-					struct r600_resource *resource,
-					unsigned usage)
-{
-	enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
-	unsigned flags = 0;
-	bool sync_flush = TRUE;
-
-	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
-		return ctx->b.ws->buffer_map(resource->cs_buf, NULL, usage);
-	}
-
-	if (!(usage & PIPE_TRANSFER_WRITE)) {
-		/* have to wait for pending read */
-		rusage = RADEON_USAGE_WRITE;
-	}
-	if (usage & PIPE_TRANSFER_DONTBLOCK) {
-		flags |= RADEON_FLUSH_ASYNC;
-	}
-
-	if (ctx->b.ws->cs_is_buffer_referenced(ctx->b.rings.gfx.cs, resource->cs_buf, rusage) && ctx->b.rings.gfx.cs->cdw) {
-		ctx->b.rings.gfx.flush(ctx, flags);
-		if (usage & PIPE_TRANSFER_DONTBLOCK) {
-			return NULL;
-		}
-	}
-	if (ctx->b.rings.dma.cs) {
-		if (ctx->b.ws->cs_is_buffer_referenced(ctx->b.rings.dma.cs, resource->cs_buf, rusage) && ctx->b.rings.dma.cs->cdw) {
-			ctx->b.rings.dma.flush(ctx, flags);
-			if (usage & PIPE_TRANSFER_DONTBLOCK) {
-				return NULL;
-			}
-		}
-	}
-
-	if (usage & PIPE_TRANSFER_DONTBLOCK) {
-		if (ctx->b.ws->buffer_is_busy(resource->buf, rusage)) {
-			return NULL;
-		}
-	}
-	if (sync_flush) {
-		/* Try to avoid busy-waiting in radeon_bo_wait. */
-		ctx->b.ws->cs_sync_flush(ctx->b.rings.gfx.cs);
-		if (ctx->b.rings.dma.cs) {
-			ctx->b.ws->cs_sync_flush(ctx->b.rings.dma.cs);
-		}
-	}
-
-	/* at this point everything is synchronized */
-	return ctx->b.ws->buffer_map(resource->cs_buf, NULL, usage);
-}
-
 static void r600_flush_from_winsys(void *ctx, unsigned flags)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 80a96ca..dc8274b 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -637,12 +637,6 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
 					 unsigned alignment);
 
 /* r600_pipe.c */
-boolean r600_rings_is_buffer_referenced(struct r600_context *ctx,
-					struct radeon_winsys_cs_handle *buf,
-					enum radeon_bo_usage usage);
-void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx,
-					struct r600_resource *resource,
-					unsigned usage);
 const char * r600_llvm_gpu_string(enum radeon_family family);
 
 
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 457c9ad..b9ff6a6 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -62,7 +62,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, uns
 	switch (type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
-		results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
+		results = r600_buffer_map_sync_with_rings(&ctx->b, buf, PIPE_TRANSFER_WRITE);
 		memset(results, 0, buf_size);
 
 		/* Set top bits for unused backends. */
@@ -86,7 +86,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, uns
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 	case PIPE_QUERY_PIPELINE_STATISTICS:
-		results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
+		results = r600_buffer_map_sync_with_rings(&ctx->b, buf, PIPE_TRANSFER_WRITE);
 		memset(results, 0, buf_size);
 		ctx->b.ws->buffer_unmap(buf->cs_buf);
 		break;
@@ -415,7 +415,7 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 	}
 
 	/* Obtain a new buffer if the current one can't be mapped without a stall. */
-	if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
+	if (r600_rings_is_buffer_referenced(&rctx->b, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
 	    rctx->b.ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
 		pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
 		rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
@@ -496,7 +496,7 @@ static boolean r600_get_query_buffer_result(struct r600_context *ctx,
 		return TRUE;
 	}
 
-	map = r600_buffer_mmap_sync_with_rings(ctx, qbuf->buf,
+	map = r600_buffer_map_sync_with_rings(&ctx->b, qbuf->buf,
 						PIPE_TRANSFER_READ |
 						(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
 	if (!map)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 71818c7..80cdcd5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -164,7 +164,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		if (shader->bo == NULL) {
 			return -ENOMEM;
 		}
-		ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE);
+		ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
 		if (R600_BIG_ENDIAN) {
 			for (i = 0; i < shader->shader.bc.ndw; ++i) {
 				ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]);
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 481c654..d505d6b 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -791,7 +791,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 
 	/* Use a staging texture for uploads if the underlying BO is busy. */
 	if (!(usage & PIPE_TRANSFER_READ) &&
-	    (r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
+	    (r600_rings_is_buffer_referenced(&rctx->b, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
 	     rctx->b.ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) {
 		use_staging_texture = TRUE;
 	}
@@ -898,7 +898,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
 		buf = &rtex->resource;
 	}
 
-	if (!(map = r600_buffer_mmap_sync_with_rings(rctx, buf, usage))) {
+	if (!(map = r600_buffer_map_sync_with_rings(&rctx->b, buf, usage))) {
 		pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL);
 		FREE(trans);
 		return NULL;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 0648455..8f0798a 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -271,3 +271,69 @@ void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_re
 	rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
 	pipe_mutex_unlock(rscreen->aux_context_lock);
 }
+
+boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
+					struct radeon_winsys_cs_handle *buf,
+					enum radeon_bo_usage usage)
+{
+	if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
+		return TRUE;
+	}
+	if (ctx->rings.dma.cs &&
+	    ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
+		return TRUE;
+	}
+	return FALSE;
+}
+
+void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+                                      struct r600_resource *resource,
+                                      unsigned usage)
+{
+	enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
+
+	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+		return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
+	}
+
+	if (!(usage & PIPE_TRANSFER_WRITE)) {
+		/* have to wait for the last write */
+		rusage = RADEON_USAGE_WRITE;
+	}
+
+	if (ctx->rings.gfx.cs->cdw &&
+	    ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
+					     resource->cs_buf, rusage)) {
+		if (usage & PIPE_TRANSFER_DONTBLOCK) {
+			ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
+			return NULL;
+		} else {
+			ctx->rings.gfx.flush(ctx, 0);
+		}
+	}
+	if (ctx->rings.dma.cs &&
+	    ctx->rings.dma.cs->cdw &&
+	    ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
+					     resource->cs_buf, rusage)) {
+		if (usage & PIPE_TRANSFER_DONTBLOCK) {
+			ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
+			return NULL;
+		} else {
+			ctx->rings.dma.flush(ctx, 0);
+		}
+	}
+
+	if (ctx->ws->buffer_is_busy(resource->buf, rusage)) {
+		if (usage & PIPE_TRANSFER_DONTBLOCK) {
+			return NULL;
+		} else {
+			/* We will be wait for the GPU. Wait for any offloaded
+			 * CS flush to complete to avoid busy-waiting in the winsys. */
+			ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
+			if (ctx->rings.dma.cs)
+				ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
+		}
+	}
+
+	return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
+}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index e5e79b0..84fdff1 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -257,6 +257,12 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 			  const struct tgsi_token *tokens);
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
 			      unsigned offset, unsigned size, unsigned value);
+boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
+					struct radeon_winsys_cs_handle *buf,
+					enum radeon_bo_usage usage);
+void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+                                      struct r600_resource *resource,
+                                      unsigned usage);
 
 /* r600_streamout.c */
 void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
-- 
1.8.1.2