[Mesa-dev] [PATCH 8/8] gallium/radeon: derive buffer placement and flags only once per buffer
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Fri Aug 26 10:42:06 UTC 2016
I would prefer it if the function could be split in two functions
instead of using the initialized flag. I think we know whether it is
an initialization or a reinitialization per call site.
- Bas
On Thu, Aug 18, 2016 at 9:46 PM, Marek Olšák <maraeo at gmail.com> wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> Invalidated buffers don't have to do this.
> ---
> src/gallium/drivers/radeon/r600_buffer_common.c | 147 +++++++++++++-----------
> src/gallium/drivers/radeon/r600_pipe_common.h | 2 +
> 2 files changed, 80 insertions(+), 69 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
> index 4480293..113a7dc 100644
> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
> @@ -98,91 +98,108 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
> /* Setting the CS to NULL will prevent doing checks we have done already. */
> return ctx->ws->buffer_map(resource->buf, NULL, usage);
> }
>
> bool r600_init_resource(struct r600_common_screen *rscreen,
> struct r600_resource *res,
> uint64_t size, unsigned alignment)
> {
> struct r600_texture *rtex = (struct r600_texture*)res;
> struct pb_buffer *old_buf, *new_buf;
> - enum radeon_bo_flag flags = 0;
> -
> - switch (res->b.b.usage) {
> - case PIPE_USAGE_STREAM:
> - flags = RADEON_FLAG_GTT_WC;
> - /* fall through */
> - case PIPE_USAGE_STAGING:
> - /* Transfers are likely to occur more often with these resources. */
> - res->domains = RADEON_DOMAIN_GTT;
> - break;
> - case PIPE_USAGE_DYNAMIC:
> - /* Older kernels didn't always flush the HDP cache before
> - * CS execution
> - */
> - if (rscreen->info.drm_major == 2 &&
> - rscreen->info.drm_minor < 40) {
> +
> + if (!res->initialized) {
> + res->flags = 0;
> +
> + switch (res->b.b.usage) {
> + case PIPE_USAGE_STREAM:
> + res->flags = RADEON_FLAG_GTT_WC;
> + /* fall through */
> + case PIPE_USAGE_STAGING:
> + /* Transfers are likely to occur more often with these
> + * resources. */
> res->domains = RADEON_DOMAIN_GTT;
> - flags |= RADEON_FLAG_GTT_WC;
> + break;
> + case PIPE_USAGE_DYNAMIC:
> + /* Older kernels didn't always flush the HDP cache before
> + * CS execution
> + */
> + if (rscreen->info.drm_major == 2 &&
> + rscreen->info.drm_minor < 40) {
> + res->domains = RADEON_DOMAIN_GTT;
> + res->flags |= RADEON_FLAG_GTT_WC;
> + break;
> + }
> + res->flags |= RADEON_FLAG_CPU_ACCESS;
> + /* fall through */
> + case PIPE_USAGE_DEFAULT:
> + case PIPE_USAGE_IMMUTABLE:
> + default:
> + /* Not listing GTT here improves performance in some
> + * apps. */
> + res->domains = RADEON_DOMAIN_VRAM;
> + res->flags |= RADEON_FLAG_GTT_WC;
> break;
> }
> - flags |= RADEON_FLAG_CPU_ACCESS;
> - /* fall through */
> - case PIPE_USAGE_DEFAULT:
> - case PIPE_USAGE_IMMUTABLE:
> - default:
> - /* Not listing GTT here improves performance in some apps. */
> - res->domains = RADEON_DOMAIN_VRAM;
> - flags |= RADEON_FLAG_GTT_WC;
> - break;
> - }
>
> - if (res->b.b.target == PIPE_BUFFER &&
> - res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
> - PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
> - /* Use GTT for all persistent mappings with older kernels,
> - * because they didn't always flush the HDP cache before CS
> - * execution.
> - *
> - * Write-combined CPU mappings are fine, the kernel ensures all CPU
> - * writes finish before the GPU executes a command stream.
> + if (res->b.b.target == PIPE_BUFFER &&
> + res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
> + PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
> + /* Use GTT for all persistent mappings with older
> + * kernels, because they didn't always flush the HDP
> + * cache before CS execution.
> + *
> + * Write-combined CPU mappings are fine, the kernel
> + * ensures all CPU writes finish before the GPU
> + * executes a command stream.
> + */
> + if (rscreen->info.drm_major == 2 &&
> + rscreen->info.drm_minor < 40)
> + res->domains = RADEON_DOMAIN_GTT;
> + else if (res->domains & RADEON_DOMAIN_VRAM)
> + res->flags |= RADEON_FLAG_CPU_ACCESS;
> + }
> +
> + /* Tiled textures are unmappable. Always put them in VRAM. */
> + if (res->b.b.target != PIPE_BUFFER &&
> + rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
> + res->domains = RADEON_DOMAIN_VRAM;
> + res->flags &= ~RADEON_FLAG_CPU_ACCESS;
> + res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
> + RADEON_FLAG_GTT_WC;
> + }
> +
> + /* If VRAM is just stolen system memory, allow both VRAM and
> + * GTT, whichever has free space. If a buffer is evicted from
> + * VRAM to GTT, it will stay there.
> */
> - if (rscreen->info.drm_major == 2 &&
> - rscreen->info.drm_minor < 40)
> - res->domains = RADEON_DOMAIN_GTT;
> - else if (res->domains & RADEON_DOMAIN_VRAM)
> - flags |= RADEON_FLAG_CPU_ACCESS;
> - }
> + if (!rscreen->info.has_dedicated_vram &&
> + res->domains == RADEON_DOMAIN_VRAM)
> + res->domains = RADEON_DOMAIN_VRAM_GTT;
>
> - /* Tiled textures are unmappable. Always put them in VRAM. */
> - if (res->b.b.target != PIPE_BUFFER &&
> - rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
> - res->domains = RADEON_DOMAIN_VRAM;
> - flags &= ~RADEON_FLAG_CPU_ACCESS;
> - flags |= RADEON_FLAG_NO_CPU_ACCESS |
> - RADEON_FLAG_GTT_WC;
> - }
> + if (rscreen->debug_flags & DBG_NO_WC)
> + res->flags &= ~RADEON_FLAG_GTT_WC;
>
> - /* If VRAM is just stolen system memory, allow both VRAM and GTT,
> - * whichever has free space. If a buffer is evicted from VRAM to GTT,
> - * it will stay there.
> - */
> - if (!rscreen->info.has_dedicated_vram &&
> - res->domains == RADEON_DOMAIN_VRAM)
> - res->domains = RADEON_DOMAIN_VRAM_GTT;
> + /* Set expected VRAM and GART usage for the buffer. */
> + res->vram_usage = 0;
> + res->gart_usage = 0;
>
> - if (rscreen->debug_flags & DBG_NO_WC)
> - flags &= ~RADEON_FLAG_GTT_WC;
> + if (res->domains & RADEON_DOMAIN_VRAM)
> + res->vram_usage = size;
> + else if (res->domains & RADEON_DOMAIN_GTT)
> + res->gart_usage = size;
> +
> + res->initialized = true;
> + }
>
> /* Allocate a new resource. */
> new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
> - res->domains, flags);
> + res->domains, res->flags);
> if (!new_buf) {
> return false;
> }
>
> /* Replace the pointer such that if res->buf wasn't NULL, it won't be
> * NULL. This should prevent crashes with multiple contexts using
> * the same buffer where one of the contexts invalidates it while
> * the others are using it. */
> old_buf = res->buf;
> res->buf = new_buf; /* should be atomic */
> @@ -190,29 +207,20 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
> if (rscreen->info.has_virtual_memory)
> res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
> else
> res->gpu_address = 0;
>
> pb_reference(&old_buf, NULL);
>
> util_range_set_empty(&res->valid_buffer_range);
> res->TC_L2_dirty = false;
>
> - /* Set expected VRAM and GART usage for the buffer. */
> - res->vram_usage = 0;
> - res->gart_usage = 0;
> -
> - if (res->domains & RADEON_DOMAIN_VRAM)
> - res->vram_usage = size;
> - else if (res->domains & RADEON_DOMAIN_GTT)
> - res->gart_usage = size;
> -
> /* Print debug information. */
> if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
> fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
> res->gpu_address, res->gpu_address + res->buf->size,
> res->buf->size);
> }
> return true;
> }
>
> static void r600_buffer_destroy(struct pipe_screen *screen,
> @@ -496,20 +504,21 @@ r600_alloc_buffer_struct(struct pipe_screen *screen,
> {
> struct r600_resource *rbuffer;
>
> rbuffer = MALLOC_STRUCT(r600_resource);
>
> rbuffer->b.b = *templ;
> pipe_reference_init(&rbuffer->b.b.reference, 1);
> rbuffer->b.b.screen = screen;
> rbuffer->b.vtbl = &r600_buffer_vtbl;
> rbuffer->buf = NULL;
> + rbuffer->initialized = false;
> rbuffer->TC_L2_dirty = false;
> rbuffer->is_shared = false;
> util_range_init(&rbuffer->valid_buffer_range);
> return rbuffer;
> }
>
> struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
> const struct pipe_resource *templ,
> unsigned alignment)
> {
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 5375044..358d5f4 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -169,20 +169,22 @@ struct r600_resource {
>
> /* Winsys objects. */
> struct pb_buffer *buf;
> uint64_t gpu_address;
> /* Memory usage if the buffer placement is optimal. */
> uint64_t vram_usage;
> uint64_t gart_usage;
>
> /* Resource state. */
> enum radeon_bo_domain domains;
> + enum radeon_bo_flag flags;
> + bool initialized;
>
> /* The buffer range which is initialized (with a write transfer,
> * streamout, DMA, or as a random access target). The rest of
> * the buffer is considered invalid and can be mapped unsynchronized.
> *
> * This allows unsychronized mapping of a buffer range which hasn't
> * been used yet. It's for applications which forget to use
> * the unsynchronized map flag and expect the driver to figure it out.
> */
> struct util_range valid_buffer_range;
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list