[Mesa-dev] [PATCH 18/18] radeonsi: use R600_RESOURCE_FLAG_UNMAPPABLE where it's desirable

Thu Feb 16 15:23:52 UTC 2017

Some cool improvements all around. Some questions on patches 9, 12, 15, 
the rest are

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

On 16.02.2017 13:53, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>  src/gallium/drivers/radeon/r600_texture.c       | 11 +++++--
>  src/gallium/drivers/radeonsi/si_compute.c       |  6 ++--
>  src/gallium/drivers/radeonsi/si_cp_dma.c        |  6 ++--
>  src/gallium/drivers/radeonsi/si_pipe.c          | 12 +++++---
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 41 ++++++++++++++++---------
>  5 files changed, 50 insertions(+), 26 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 47aa8b1..0865d35 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -756,21 +756,23 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
>
>  	assert(rtex->cmask.size == 0);
>
>  	if (rscreen->chip_class >= SI) {
>  		si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
>  	} else {
>  		r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
>  	}
>
>  	rtex->cmask_buffer = (struct r600_resource *)
> -		r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
> +		r600_aligned_buffer_create(&rscreen->b,
> +					   R600_RESOURCE_FLAG_UNMAPPABLE,
> +					   PIPE_USAGE_DEFAULT,
>  					   rtex->cmask.size,
>  					   rtex->cmask.alignment);
>  	if (rtex->cmask_buffer == NULL) {
>  		rtex->cmask.size = 0;
>  		return;
>  	}
>
>  	/* update colorbuffer state bits */
>  	rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
>
> @@ -867,21 +869,23 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
>  		clear_value = 0x0000030F;
>  	} else {
>  		r600_texture_get_htile_size(rscreen, rtex);
>  		clear_value = 0;
>  	}
>
>  	if (!rtex->surface.htile_size)
>  		return;
>
>  	rtex->htile_buffer = (struct r600_resource*)
> -		r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
> +		r600_aligned_buffer_create(&rscreen->b,
> +					   R600_RESOURCE_FLAG_UNMAPPABLE,
> +					   PIPE_USAGE_DEFAULT,
>  					   rtex->surface.htile_size,
>  					   rtex->surface.htile_alignment);
>  	if (rtex->htile_buffer == NULL) {
>  		/* this is not a fatal error as we can still keep rendering
>  		 * without htile buffer */
>  		R600_ERR("Failed to create buffer object for htile buffer.\n");
>  	} else {
>  		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
>  					 0, rtex->surface.htile_size,
>  					 clear_value);
> @@ -2099,21 +2103,22 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
>  	r600_texture_discard_cmask(rctx->screen, tex);
>
>  	/* Get a DCC buffer. */
>  	if (tex->last_dcc_separate_buffer) {
>  		assert(tex->dcc_gather_statistics);
>  		assert(!tex->dcc_separate_buffer);
>  		tex->dcc_separate_buffer = tex->last_dcc_separate_buffer;
>  		tex->last_dcc_separate_buffer = NULL;
>  	} else {
>  		tex->dcc_separate_buffer = (struct r600_resource*)
> -			r600_aligned_buffer_create(rctx->b.screen, 0,
> +			r600_aligned_buffer_create(rctx->b.screen,
> +						   R600_RESOURCE_FLAG_UNMAPPABLE,
>  						   PIPE_USAGE_DEFAULT,
>  						   tex->surface.dcc_size,
>  						   tex->surface.dcc_alignment);
>  		if (!tex->dcc_separate_buffer)
>  			return;
>  	}
>
>  	/* dcc_offset is the absolute GPUVM address. */
>  	tex->dcc_offset = tex->dcc_separate_buffer->gpu_address;
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 88d72c1..f4efb0d 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -282,22 +282,24 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
>  	uint64_t scratch_bo_size, scratch_needed;
>  	scratch_bo_size = 0;
>  	scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;
>  	if (sctx->compute_scratch_buffer)
>  		scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
>
>  	if (scratch_bo_size < scratch_needed) {
>  		r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
>
>  		sctx->compute_scratch_buffer = (struct r600_resource*)
> -			pipe_buffer_create(&sctx->screen->b.b, 0,
> -					   PIPE_USAGE_DEFAULT, scratch_needed);
> +			r600_aligned_buffer_create(&sctx->screen->b.b,
> +						   R600_RESOURCE_FLAG_UNMAPPABLE,
> +						   PIPE_USAGE_DEFAULT,
> +						   scratch_needed, 256);
>
>  		if (!sctx->compute_scratch_buffer)
>  			return false;
>  	}
>
>  	if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
>  		uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
>
>  		si_shader_apply_scratch_relocs(sctx, shader, config, scratch_va);
>
> diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
> index ea999d9..1be7586 100644
> --- a/src/gallium/drivers/radeonsi/si_cp_dma.c
> +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
> @@ -269,22 +269,24 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
>
>  	assert(size < SI_CPDMA_ALIGNMENT);
>
>  	/* Use the scratch buffer as the dummy buffer. The 3D engine should be
>  	 * idle at this point.
>  	 */
>  	if (!sctx->scratch_buffer ||
>  	    sctx->scratch_buffer->b.b.width0 < scratch_size) {
>  		r600_resource_reference(&sctx->scratch_buffer, NULL);
>  		sctx->scratch_buffer = (struct r600_resource*)
> -			pipe_buffer_create(&sctx->screen->b.b, 0,
> -					   PIPE_USAGE_DEFAULT, scratch_size);
> +			r600_aligned_buffer_create(&sctx->screen->b.b,
> +						   R600_RESOURCE_FLAG_UNMAPPABLE,
> +						   PIPE_USAGE_DEFAULT,
> +						   scratch_size, 256);
>  		if (!sctx->scratch_buffer)
>  			return;
>
>  		si_mark_atom_dirty(sctx, &sctx->scratch_state);
>  	}
>
>  	si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
>  			  &sctx->scratch_buffer->b.b, size, size, user_flags,
>  			  is_first, &dma_flags);
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index a947bad..843c6b3 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -197,22 +197,23 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>
>  		if (ws->cs_add_const_preamble_ib) {
>  			sctx->ce_preamble_ib =
>  			           ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);
>
>  			if (!sctx->ce_preamble_ib)
>  				goto fail;
>  		}
>
>  		sctx->ce_suballocator =
> -				u_suballocator_create(&sctx->b.b, 1024 * 1024,
> -						      0, PIPE_USAGE_DEFAULT, 0, false);
> +			u_suballocator_create(&sctx->b.b, 1024 * 1024, 0,
> +					      PIPE_USAGE_DEFAULT,
> +					      R600_RESOURCE_FLAG_UNMAPPABLE, false);
>  		if (!sctx->ce_suballocator)
>  			goto fail;
>  	}
>
>  	sctx->b.gfx.flush = si_context_gfx_flush;
>
>  	/* Border colors. */
>  	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
>  					  sizeof(*sctx->border_color_table));
>  	if (!sctx->border_color_table)
> @@ -250,22 +251,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>  	sctx->blitter->draw_rectangle = r600_draw_rectangle;
>
>  	sctx->sample_mask.sample_mask = 0xffff;
>
>  	/* these must be last */
>  	si_begin_new_cs(sctx);
>
>  	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
>  	 * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
>  	if (sctx->b.chip_class == CIK) {
> -		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
> -								 PIPE_USAGE_DEFAULT, 16);
> +		sctx->null_const_buf.buffer =
> +			r600_aligned_buffer_create(screen,
> +						   R600_RESOURCE_FLAG_UNMAPPABLE,
> +						   PIPE_USAGE_DEFAULT, 16,
> +						   sctx->screen->b.info.tcc_cache_line_size);
>  		if (!sctx->null_const_buf.buffer)
>  			goto fail;
>  		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
>
>  		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
>  			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
>  				sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
>  							      &sctx->null_const_buf);
>  			}
>  		}
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 179176c..4a81b56 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -2072,32 +2072,36 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
>  			    sctx->esgs_ring->width0 < esgs_ring_size);
>  	bool update_gsvs = gsvs_ring_size &&
>  			   (!sctx->gsvs_ring ||
>  			    sctx->gsvs_ring->width0 < gsvs_ring_size);
>
>  	if (!update_esgs && !update_gsvs)
>  		return true;
>
>  	if (update_esgs) {
>  		pipe_resource_reference(&sctx->esgs_ring, NULL);
> -		sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, 0,
> -						     PIPE_USAGE_DEFAULT,
> -						     esgs_ring_size);
> +		sctx->esgs_ring =
> +			r600_aligned_buffer_create(sctx->b.b.screen,
> +						   R600_RESOURCE_FLAG_UNMAPPABLE,
> +						   PIPE_USAGE_DEFAULT,
> +						   esgs_ring_size, alignment);
>  		if (!sctx->esgs_ring)
>  			return false;
>  	}
>
>  	if (update_gsvs) {
>  		pipe_resource_reference(&sctx->gsvs_ring, NULL);
> -		sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, 0,
> -						     PIPE_USAGE_DEFAULT,
> -						     gsvs_ring_size);
> +		sctx->gsvs_ring =
> +			r600_aligned_buffer_create(sctx->b.b.screen,
> +						   R600_RESOURCE_FLAG_UNMAPPABLE,
> +						   PIPE_USAGE_DEFAULT,
> +						   gsvs_ring_size, alignment);
>  		if (!sctx->gsvs_ring)
>  			return false;
>  	}
>
>  	/* Create the "init_config_gs_rings" state. */
>  	pm4 = CALLOC_STRUCT(si_pm4_state);
>  	if (!pm4)
>  		return false;
>
>  	if (sctx->b.chip_class >= CIK) {
> @@ -2220,22 +2224,24 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
>  		sctx->scratch_waves;
>  	unsigned spi_tmpring_size;
>  	int r;
>
>  	if (scratch_needed_size > 0) {
>  		if (scratch_needed_size > current_scratch_buffer_size) {
>  			/* Create a bigger scratch buffer */
>  			r600_resource_reference(&sctx->scratch_buffer, NULL);
>
>  			sctx->scratch_buffer = (struct r600_resource*)
> -					pipe_buffer_create(&sctx->screen->b.b, 0,
> -	                                PIPE_USAGE_DEFAULT, scratch_needed_size);
> +				r600_aligned_buffer_create(&sctx->screen->b.b,
> +							   R600_RESOURCE_FLAG_UNMAPPABLE,
> +							   PIPE_USAGE_DEFAULT,
> +							   scratch_needed_size, 256);
>  			if (!sctx->scratch_buffer)
>  				return false;
>
>  			si_mark_atom_dirty(sctx, &sctx->scratch_state);
>  			r600_context_add_resource_size(&sctx->b.b,
>  						       &sctx->scratch_buffer->b.b);
>  		}
>
>  		/* Update the shaders, so they are using the latest scratch.  The
>  		 * scratch buffer may have been changed since these shaders were
> @@ -2325,32 +2331,37 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
>  	case CIK:
>  		max_offchip_buffers = MIN2(max_offchip_buffers, 508);
>  		break;
>  	case VI:
>  	default:
>  		max_offchip_buffers = MIN2(max_offchip_buffers, 512);
>  		break;
>  	}
>
>  	assert(!sctx->tf_ring);
> -	sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, 0,
> -					   PIPE_USAGE_DEFAULT,
> -					   32768 * sctx->screen->b.info.max_se);
> +	sctx->tf_ring = r600_aligned_buffer_create(sctx->b.b.screen,
> +						   R600_RESOURCE_FLAG_UNMAPPABLE,
> +						   PIPE_USAGE_DEFAULT,
> +						   32768 * sctx->screen->b.info.max_se,
> +						   256);
>  	if (!sctx->tf_ring)
>  		return;
>
>  	assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
>
> -	sctx->tess_offchip_ring = pipe_buffer_create(sctx->b.b.screen, 0,
> -	                                             PIPE_USAGE_DEFAULT,
> -	                                             max_offchip_buffers *
> -	                                             sctx->screen->tess_offchip_block_dw_size * 4);
> +	sctx->tess_offchip_ring =
> +		r600_aligned_buffer_create(sctx->b.b.screen,
> +					   R600_RESOURCE_FLAG_UNMAPPABLE,
> +					   PIPE_USAGE_DEFAULT,
> +					   max_offchip_buffers *
> +					   sctx->screen->tess_offchip_block_dw_size * 4,
> +					   256);
>  	if (!sctx->tess_offchip_ring)
>  		return;
>
>  	si_init_config_add_vgt_flush(sctx);
>
>  	/* Append these registers to the init config state. */
>  	if (sctx->b.chip_class >= CIK) {
>  		if (sctx->b.chip_class >= VI)
>  			--max_offchip_buffers;
>
>
/