[Mesa-dev] [PATCH 18/18] radeonsi: use R600_RESOURCE_FLAG_UNMAPPABLE where it's desirable
Nicolai Hähnle
nhaehnle at gmail.com
Thu Feb 16 15:23:52 UTC 2017
Some cool improvements all around. Some questions on patches 9, 12, 15,
the rest are
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 16.02.2017 13:53, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeon/r600_texture.c | 11 +++++--
> src/gallium/drivers/radeonsi/si_compute.c | 6 ++--
> src/gallium/drivers/radeonsi/si_cp_dma.c | 6 ++--
> src/gallium/drivers/radeonsi/si_pipe.c | 12 +++++---
> src/gallium/drivers/radeonsi/si_state_shaders.c | 41 ++++++++++++++++---------
> 5 files changed, 50 insertions(+), 26 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 47aa8b1..0865d35 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -756,21 +756,23 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
>
> assert(rtex->cmask.size == 0);
>
> if (rscreen->chip_class >= SI) {
> si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
> } else {
> r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
> }
>
> rtex->cmask_buffer = (struct r600_resource *)
> - r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
> + r600_aligned_buffer_create(&rscreen->b,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> rtex->cmask.size,
> rtex->cmask.alignment);
> if (rtex->cmask_buffer == NULL) {
> rtex->cmask.size = 0;
> return;
> }
>
> /* update colorbuffer state bits */
> rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
>
> @@ -867,21 +869,23 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
> clear_value = 0x0000030F;
> } else {
> r600_texture_get_htile_size(rscreen, rtex);
> clear_value = 0;
> }
>
> if (!rtex->surface.htile_size)
> return;
>
> rtex->htile_buffer = (struct r600_resource*)
> - r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
> + r600_aligned_buffer_create(&rscreen->b,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> rtex->surface.htile_size,
> rtex->surface.htile_alignment);
> if (rtex->htile_buffer == NULL) {
> /* this is not a fatal error as we can still keep rendering
> * without htile buffer */
> R600_ERR("Failed to create buffer object for htile buffer.\n");
> } else {
> r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
> 0, rtex->surface.htile_size,
> clear_value);
> @@ -2099,21 +2103,22 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
> r600_texture_discard_cmask(rctx->screen, tex);
>
> /* Get a DCC buffer. */
> if (tex->last_dcc_separate_buffer) {
> assert(tex->dcc_gather_statistics);
> assert(!tex->dcc_separate_buffer);
> tex->dcc_separate_buffer = tex->last_dcc_separate_buffer;
> tex->last_dcc_separate_buffer = NULL;
> } else {
> tex->dcc_separate_buffer = (struct r600_resource*)
> - r600_aligned_buffer_create(rctx->b.screen, 0,
> + r600_aligned_buffer_create(rctx->b.screen,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> PIPE_USAGE_DEFAULT,
> tex->surface.dcc_size,
> tex->surface.dcc_alignment);
> if (!tex->dcc_separate_buffer)
> return;
> }
>
> /* dcc_offset is the absolute GPUVM address. */
> tex->dcc_offset = tex->dcc_separate_buffer->gpu_address;
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 88d72c1..f4efb0d 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -282,22 +282,24 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
> uint64_t scratch_bo_size, scratch_needed;
> scratch_bo_size = 0;
> scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;
> if (sctx->compute_scratch_buffer)
> scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
>
> if (scratch_bo_size < scratch_needed) {
> r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
>
> sctx->compute_scratch_buffer = (struct r600_resource*)
> - pipe_buffer_create(&sctx->screen->b.b, 0,
> - PIPE_USAGE_DEFAULT, scratch_needed);
> + r600_aligned_buffer_create(&sctx->screen->b.b,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> + scratch_needed, 256);
>
> if (!sctx->compute_scratch_buffer)
> return false;
> }
>
> if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
> uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
>
> si_shader_apply_scratch_relocs(sctx, shader, config, scratch_va);
>
> diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
> index ea999d9..1be7586 100644
> --- a/src/gallium/drivers/radeonsi/si_cp_dma.c
> +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
> @@ -269,22 +269,24 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
>
> assert(size < SI_CPDMA_ALIGNMENT);
>
> /* Use the scratch buffer as the dummy buffer. The 3D engine should be
> * idle at this point.
> */
> if (!sctx->scratch_buffer ||
> sctx->scratch_buffer->b.b.width0 < scratch_size) {
> r600_resource_reference(&sctx->scratch_buffer, NULL);
> sctx->scratch_buffer = (struct r600_resource*)
> - pipe_buffer_create(&sctx->screen->b.b, 0,
> - PIPE_USAGE_DEFAULT, scratch_size);
> + r600_aligned_buffer_create(&sctx->screen->b.b,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> + scratch_size, 256);
> if (!sctx->scratch_buffer)
> return;
>
> si_mark_atom_dirty(sctx, &sctx->scratch_state);
> }
>
> si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
> &sctx->scratch_buffer->b.b, size, size, user_flags,
> is_first, &dma_flags);
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index a947bad..843c6b3 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -197,22 +197,23 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>
> if (ws->cs_add_const_preamble_ib) {
> sctx->ce_preamble_ib =
> ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);
>
> if (!sctx->ce_preamble_ib)
> goto fail;
> }
>
> sctx->ce_suballocator =
> - u_suballocator_create(&sctx->b.b, 1024 * 1024,
> - 0, PIPE_USAGE_DEFAULT, 0, false);
> + u_suballocator_create(&sctx->b.b, 1024 * 1024, 0,
> + PIPE_USAGE_DEFAULT,
> + R600_RESOURCE_FLAG_UNMAPPABLE, false);
> if (!sctx->ce_suballocator)
> goto fail;
> }
>
> sctx->b.gfx.flush = si_context_gfx_flush;
>
> /* Border colors. */
> sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
> sizeof(*sctx->border_color_table));
> if (!sctx->border_color_table)
> @@ -250,22 +251,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
> sctx->blitter->draw_rectangle = r600_draw_rectangle;
>
> sctx->sample_mask.sample_mask = 0xffff;
>
> /* these must be last */
> si_begin_new_cs(sctx);
>
> /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
> * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
> if (sctx->b.chip_class == CIK) {
> - sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
> - PIPE_USAGE_DEFAULT, 16);
> + sctx->null_const_buf.buffer =
> + r600_aligned_buffer_create(screen,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT, 16,
> + sctx->screen->b.info.tcc_cache_line_size);
> if (!sctx->null_const_buf.buffer)
> goto fail;
> sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
>
> for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
> sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
> &sctx->null_const_buf);
> }
> }
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 179176c..4a81b56 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -2072,32 +2072,36 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
> sctx->esgs_ring->width0 < esgs_ring_size);
> bool update_gsvs = gsvs_ring_size &&
> (!sctx->gsvs_ring ||
> sctx->gsvs_ring->width0 < gsvs_ring_size);
>
> if (!update_esgs && !update_gsvs)
> return true;
>
> if (update_esgs) {
> pipe_resource_reference(&sctx->esgs_ring, NULL);
> - sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, 0,
> - PIPE_USAGE_DEFAULT,
> - esgs_ring_size);
> + sctx->esgs_ring =
> + r600_aligned_buffer_create(sctx->b.b.screen,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> + esgs_ring_size, alignment);
> if (!sctx->esgs_ring)
> return false;
> }
>
> if (update_gsvs) {
> pipe_resource_reference(&sctx->gsvs_ring, NULL);
> - sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, 0,
> - PIPE_USAGE_DEFAULT,
> - gsvs_ring_size);
> + sctx->gsvs_ring =
> + r600_aligned_buffer_create(sctx->b.b.screen,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> + gsvs_ring_size, alignment);
> if (!sctx->gsvs_ring)
> return false;
> }
>
> /* Create the "init_config_gs_rings" state. */
> pm4 = CALLOC_STRUCT(si_pm4_state);
> if (!pm4)
> return false;
>
> if (sctx->b.chip_class >= CIK) {
> @@ -2220,22 +2224,24 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
> sctx->scratch_waves;
> unsigned spi_tmpring_size;
> int r;
>
> if (scratch_needed_size > 0) {
> if (scratch_needed_size > current_scratch_buffer_size) {
> /* Create a bigger scratch buffer */
> r600_resource_reference(&sctx->scratch_buffer, NULL);
>
> sctx->scratch_buffer = (struct r600_resource*)
> - pipe_buffer_create(&sctx->screen->b.b, 0,
> - PIPE_USAGE_DEFAULT, scratch_needed_size);
> + r600_aligned_buffer_create(&sctx->screen->b.b,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> + scratch_needed_size, 256);
> if (!sctx->scratch_buffer)
> return false;
>
> si_mark_atom_dirty(sctx, &sctx->scratch_state);
> r600_context_add_resource_size(&sctx->b.b,
> &sctx->scratch_buffer->b.b);
> }
>
> /* Update the shaders, so they are using the latest scratch. The
> * scratch buffer may have been changed since these shaders were
> @@ -2325,32 +2331,37 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
> case CIK:
> max_offchip_buffers = MIN2(max_offchip_buffers, 508);
> break;
> case VI:
> default:
> max_offchip_buffers = MIN2(max_offchip_buffers, 512);
> break;
> }
>
> assert(!sctx->tf_ring);
> - sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, 0,
> - PIPE_USAGE_DEFAULT,
> - 32768 * sctx->screen->b.info.max_se);
> + sctx->tf_ring = r600_aligned_buffer_create(sctx->b.b.screen,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> + 32768 * sctx->screen->b.info.max_se,
> + 256);
> if (!sctx->tf_ring)
> return;
>
> assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
>
> - sctx->tess_offchip_ring = pipe_buffer_create(sctx->b.b.screen, 0,
> - PIPE_USAGE_DEFAULT,
> - max_offchip_buffers *
> - sctx->screen->tess_offchip_block_dw_size * 4);
> + sctx->tess_offchip_ring =
> + r600_aligned_buffer_create(sctx->b.b.screen,
> + R600_RESOURCE_FLAG_UNMAPPABLE,
> + PIPE_USAGE_DEFAULT,
> + max_offchip_buffers *
> + sctx->screen->tess_offchip_block_dw_size * 4,
> + 256);
> if (!sctx->tess_offchip_ring)
> return;
>
> si_init_config_add_vgt_flush(sctx);
>
> /* Append these registers to the init config state. */
> if (sctx->b.chip_class >= CIK) {
> if (sctx->b.chip_class >= VI)
> --max_offchip_buffers;
>
>
/
More information about the mesa-dev
mailing list