[Mesa-dev] [PATCH 18/18] radeonsi: use R600_RESOURCE_FLAG_UNMAPPABLE where it's desirable

Thu Feb 16 12:53:10 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeon/r600_texture.c       | 11 +++++--
 src/gallium/drivers/radeonsi/si_compute.c       |  6 ++--
 src/gallium/drivers/radeonsi/si_cp_dma.c        |  6 ++--
 src/gallium/drivers/radeonsi/si_pipe.c          | 12 +++++---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 41 ++++++++++++++++---------
 5 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 47aa8b1..0865d35 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -756,21 +756,23 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
 
 	assert(rtex->cmask.size == 0);
 
 	if (rscreen->chip_class >= SI) {
 		si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 	} else {
 		r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 	}
 
 	rtex->cmask_buffer = (struct r600_resource *)
-		r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
+		r600_aligned_buffer_create(&rscreen->b,
+					   R600_RESOURCE_FLAG_UNMAPPABLE,
+					   PIPE_USAGE_DEFAULT,
 					   rtex->cmask.size,
 					   rtex->cmask.alignment);
 	if (rtex->cmask_buffer == NULL) {
 		rtex->cmask.size = 0;
 		return;
 	}
 
 	/* update colorbuffer state bits */
 	rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
 
@@ -867,21 +869,23 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
 		clear_value = 0x0000030F;
 	} else {
 		r600_texture_get_htile_size(rscreen, rtex);
 		clear_value = 0;
 	}
 
 	if (!rtex->surface.htile_size)
 		return;
 
 	rtex->htile_buffer = (struct r600_resource*)
-		r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
+		r600_aligned_buffer_create(&rscreen->b,
+					   R600_RESOURCE_FLAG_UNMAPPABLE,
+					   PIPE_USAGE_DEFAULT,
 					   rtex->surface.htile_size,
 					   rtex->surface.htile_alignment);
 	if (rtex->htile_buffer == NULL) {
 		/* this is not a fatal error as we can still keep rendering
 		 * without htile buffer */
 		R600_ERR("Failed to create buffer object for htile buffer.\n");
 	} else {
 		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
 					 0, rtex->surface.htile_size,
 					 clear_value);
@@ -2099,21 +2103,22 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
 	r600_texture_discard_cmask(rctx->screen, tex);
 
 	/* Get a DCC buffer. */
 	if (tex->last_dcc_separate_buffer) {
 		assert(tex->dcc_gather_statistics);
 		assert(!tex->dcc_separate_buffer);
 		tex->dcc_separate_buffer = tex->last_dcc_separate_buffer;
 		tex->last_dcc_separate_buffer = NULL;
 	} else {
 		tex->dcc_separate_buffer = (struct r600_resource*)
-			r600_aligned_buffer_create(rctx->b.screen, 0,
+			r600_aligned_buffer_create(rctx->b.screen,
+						   R600_RESOURCE_FLAG_UNMAPPABLE,
 						   PIPE_USAGE_DEFAULT,
 						   tex->surface.dcc_size,
 						   tex->surface.dcc_alignment);
 		if (!tex->dcc_separate_buffer)
 			return;
 	}
 
 	/* dcc_offset is the absolute GPUVM address. */
 	tex->dcc_offset = tex->dcc_separate_buffer->gpu_address;
 
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 88d72c1..f4efb0d 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -282,22 +282,24 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
 	uint64_t scratch_bo_size, scratch_needed;
 	scratch_bo_size = 0;
 	scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;
 	if (sctx->compute_scratch_buffer)
 		scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
 
 	if (scratch_bo_size < scratch_needed) {
 		r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
 
 		sctx->compute_scratch_buffer = (struct r600_resource*)
-			pipe_buffer_create(&sctx->screen->b.b, 0,
-					   PIPE_USAGE_DEFAULT, scratch_needed);
+			r600_aligned_buffer_create(&sctx->screen->b.b,
+						   R600_RESOURCE_FLAG_UNMAPPABLE,
+						   PIPE_USAGE_DEFAULT,
+						   scratch_needed, 256);
 
 		if (!sctx->compute_scratch_buffer)
 			return false;
 	}
 
 	if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
 		uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
 
 		si_shader_apply_scratch_relocs(sctx, shader, config, scratch_va);
 
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index ea999d9..1be7586 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -269,22 +269,24 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
 
 	assert(size < SI_CPDMA_ALIGNMENT);
 
 	/* Use the scratch buffer as the dummy buffer. The 3D engine should be
 	 * idle at this point.
 	 */
 	if (!sctx->scratch_buffer ||
 	    sctx->scratch_buffer->b.b.width0 < scratch_size) {
 		r600_resource_reference(&sctx->scratch_buffer, NULL);
 		sctx->scratch_buffer = (struct r600_resource*)
-			pipe_buffer_create(&sctx->screen->b.b, 0,
-					   PIPE_USAGE_DEFAULT, scratch_size);
+			r600_aligned_buffer_create(&sctx->screen->b.b,
+						   R600_RESOURCE_FLAG_UNMAPPABLE,
+						   PIPE_USAGE_DEFAULT,
+						   scratch_size, 256);
 		if (!sctx->scratch_buffer)
 			return;
 
 		si_mark_atom_dirty(sctx, &sctx->scratch_state);
 	}
 
 	si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
 			  &sctx->scratch_buffer->b.b, size, size, user_flags,
 			  is_first, &dma_flags);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index a947bad..843c6b3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -197,22 +197,23 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 		if (ws->cs_add_const_preamble_ib) {
 			sctx->ce_preamble_ib =
 			           ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);
 
 			if (!sctx->ce_preamble_ib)
 				goto fail;
 		}
 
 		sctx->ce_suballocator =
-				u_suballocator_create(&sctx->b.b, 1024 * 1024,
-						      0, PIPE_USAGE_DEFAULT, 0, false);
+			u_suballocator_create(&sctx->b.b, 1024 * 1024, 0,
+					      PIPE_USAGE_DEFAULT,
+					      R600_RESOURCE_FLAG_UNMAPPABLE, false);
 		if (!sctx->ce_suballocator)
 			goto fail;
 	}
 
 	sctx->b.gfx.flush = si_context_gfx_flush;
 
 	/* Border colors. */
 	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
 					  sizeof(*sctx->border_color_table));
 	if (!sctx->border_color_table)
@@ -250,22 +251,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	sctx->blitter->draw_rectangle = r600_draw_rectangle;
 
 	sctx->sample_mask.sample_mask = 0xffff;
 
 	/* these must be last */
 	si_begin_new_cs(sctx);
 
 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
 	 * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
 	if (sctx->b.chip_class == CIK) {
-		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
-								 PIPE_USAGE_DEFAULT, 16);
+		sctx->null_const_buf.buffer =
+			r600_aligned_buffer_create(screen,
+						   R600_RESOURCE_FLAG_UNMAPPABLE,
+						   PIPE_USAGE_DEFAULT, 16,
+						   sctx->screen->b.info.tcc_cache_line_size);
 		if (!sctx->null_const_buf.buffer)
 			goto fail;
 		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
 
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
 				sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
 							      &sctx->null_const_buf);
 			}
 		}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 179176c..4a81b56 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2072,32 +2072,36 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 			    sctx->esgs_ring->width0 < esgs_ring_size);
 	bool update_gsvs = gsvs_ring_size &&
 			   (!sctx->gsvs_ring ||
 			    sctx->gsvs_ring->width0 < gsvs_ring_size);
 
 	if (!update_esgs && !update_gsvs)
 		return true;
 
 	if (update_esgs) {
 		pipe_resource_reference(&sctx->esgs_ring, NULL);
-		sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, 0,
-						     PIPE_USAGE_DEFAULT,
-						     esgs_ring_size);
+		sctx->esgs_ring =
+			r600_aligned_buffer_create(sctx->b.b.screen,
+						   R600_RESOURCE_FLAG_UNMAPPABLE,
+						   PIPE_USAGE_DEFAULT,
+						   esgs_ring_size, alignment);
 		if (!sctx->esgs_ring)
 			return false;
 	}
 
 	if (update_gsvs) {
 		pipe_resource_reference(&sctx->gsvs_ring, NULL);
-		sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, 0,
-						     PIPE_USAGE_DEFAULT,
-						     gsvs_ring_size);
+		sctx->gsvs_ring =
+			r600_aligned_buffer_create(sctx->b.b.screen,
+						   R600_RESOURCE_FLAG_UNMAPPABLE,
+						   PIPE_USAGE_DEFAULT,
+						   gsvs_ring_size, alignment);
 		if (!sctx->gsvs_ring)
 			return false;
 	}
 
 	/* Create the "init_config_gs_rings" state. */
 	pm4 = CALLOC_STRUCT(si_pm4_state);
 	if (!pm4)
 		return false;
 
 	if (sctx->b.chip_class >= CIK) {
@@ -2220,22 +2224,24 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
 		sctx->scratch_waves;
 	unsigned spi_tmpring_size;
 	int r;
 
 	if (scratch_needed_size > 0) {
 		if (scratch_needed_size > current_scratch_buffer_size) {
 			/* Create a bigger scratch buffer */
 			r600_resource_reference(&sctx->scratch_buffer, NULL);
 
 			sctx->scratch_buffer = (struct r600_resource*)
-					pipe_buffer_create(&sctx->screen->b.b, 0,
-	                                PIPE_USAGE_DEFAULT, scratch_needed_size);
+				r600_aligned_buffer_create(&sctx->screen->b.b,
+							   R600_RESOURCE_FLAG_UNMAPPABLE,
+							   PIPE_USAGE_DEFAULT,
+							   scratch_needed_size, 256);
 			if (!sctx->scratch_buffer)
 				return false;
 
 			si_mark_atom_dirty(sctx, &sctx->scratch_state);
 			r600_context_add_resource_size(&sctx->b.b,
 						       &sctx->scratch_buffer->b.b);
 		}
 
 		/* Update the shaders, so they are using the latest scratch.  The
 		 * scratch buffer may have been changed since these shaders were
@@ -2325,32 +2331,37 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 	case CIK:
 		max_offchip_buffers = MIN2(max_offchip_buffers, 508);
 		break;
 	case VI:
 	default:
 		max_offchip_buffers = MIN2(max_offchip_buffers, 512);
 		break;
 	}
 
 	assert(!sctx->tf_ring);
-	sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, 0,
-					   PIPE_USAGE_DEFAULT,
-					   32768 * sctx->screen->b.info.max_se);
+	sctx->tf_ring = r600_aligned_buffer_create(sctx->b.b.screen,
+						   R600_RESOURCE_FLAG_UNMAPPABLE,
+						   PIPE_USAGE_DEFAULT,
+						   32768 * sctx->screen->b.info.max_se,
+						   256);
 	if (!sctx->tf_ring)
 		return;
 
 	assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
 
-	sctx->tess_offchip_ring = pipe_buffer_create(sctx->b.b.screen, 0,
-	                                             PIPE_USAGE_DEFAULT,
-	                                             max_offchip_buffers *
-	                                             sctx->screen->tess_offchip_block_dw_size * 4);
+	sctx->tess_offchip_ring =
+		r600_aligned_buffer_create(sctx->b.b.screen,
+					   R600_RESOURCE_FLAG_UNMAPPABLE,
+					   PIPE_USAGE_DEFAULT,
+					   max_offchip_buffers *
+					   sctx->screen->tess_offchip_block_dw_size * 4,
+					   256);
 	if (!sctx->tess_offchip_ring)
 		return;
 
 	si_init_config_add_vgt_flush(sctx);
 
 	/* Append these registers to the init config state. */
 	if (sctx->b.chip_class >= CIK) {
 		if (sctx->b.chip_class >= VI)
 			--max_offchip_buffers;
 
-- 
2.7.4