[Mesa-dev] [PATCH] winsys/amdgpu: add a fast exit path into amdgpu_cs_add_buffer

Nicolai Hähnle nhaehnle at gmail.com
Fri Jan 27 09:01:50 UTC 2017


On 27.01.2017 00:51, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> The time spent in the function dropped by 37% for torcs.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

> ---
>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 16 ++++++++++++++++
>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.h |  5 +++++
>  2 files changed, 21 insertions(+)
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index 0bc4ce9..2a1b932 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -437,40 +437,54 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
>  {
>     /* Don't use the "domains" parameter. Amdgpu doesn't support changing
>      * the buffer placement during command submission.
>      */
>     struct amdgpu_cs *acs = amdgpu_cs(rcs);
>     struct amdgpu_cs_context *cs = acs->csc;
>     struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
>     struct amdgpu_cs_buffer *buffer;
>     int index;
>
> +   /* Fast exit for no-op calls.
> +    * This is very effective with suballocators and linear uploaders that
> +    * are outside of the winsys.
> +    */
> +   if (bo == cs->last_added_bo &&
> +       (usage & cs->last_added_bo_usage) == usage &&
> +       (1ull << priority) & cs->last_added_bo_priority_usage)
> +      return cs->last_added_bo_index;
> +
>     if (!bo->bo) {
>        index = amdgpu_lookup_or_add_slab_buffer(acs, bo);
>        if (index < 0)
>           return 0;
>
>        buffer = &cs->slab_buffers[index];
>        buffer->usage |= usage;
>
>        usage &= ~RADEON_USAGE_SYNCHRONIZED;
>        index = buffer->u.slab.real_idx;
>     } else {
>        index = amdgpu_lookup_or_add_real_buffer(acs, bo);
>        if (index < 0)
>           return 0;
>     }
>
>     buffer = &cs->real_buffers[index];
>     buffer->u.real.priority_usage |= 1llu << priority;
>     buffer->usage |= usage;
>     cs->flags[index] = MAX2(cs->flags[index], priority / 4);
> +
> +   cs->last_added_bo = bo;
> +   cs->last_added_bo_index = index;
> +   cs->last_added_bo_usage = buffer->usage;
> +   cs->last_added_bo_priority_usage = buffer->u.real.priority_usage;
>     return index;
>  }
>
>  static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
>  {
>     struct pb_buffer *pb;
>     uint8_t *mapped;
>     unsigned buffer_size;
>
>     /* Always create a buffer that is at least as large as the maximum seen IB
> @@ -638,20 +652,21 @@ static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
>
>     default:
>     case RING_GFX:
>        cs->request.ip_type = AMDGPU_HW_IP_GFX;
>        break;
>     }
>
>     for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) {
>        cs->buffer_indices_hashlist[i] = -1;
>     }
> +   cs->last_added_bo = NULL;
>
>     cs->request.number_of_ibs = 1;
>     cs->request.ibs = &cs->ib[IB_MAIN];
>
>     cs->ib[IB_CONST].flags = AMDGPU_IB_FLAG_CE;
>     cs->ib[IB_CONST_PREAMBLE].flags = AMDGPU_IB_FLAG_CE |
>                                       AMDGPU_IB_FLAG_PREAMBLE;
>
>     return true;
>  }
> @@ -669,20 +684,21 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
>        amdgpu_winsys_bo_reference(&cs->slab_buffers[i].bo, NULL);
>     }
>
>     cs->num_real_buffers = 0;
>     cs->num_slab_buffers = 0;
>     amdgpu_fence_reference(&cs->fence, NULL);
>
>     for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) {
>        cs->buffer_indices_hashlist[i] = -1;
>     }
> +   cs->last_added_bo = NULL;
>  }
>
>  static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs)
>  {
>     amdgpu_cs_context_cleanup(cs);
>     FREE(cs->flags);
>     FREE(cs->real_buffers);
>     FREE(cs->handles);
>     FREE(cs->slab_buffers);
>     FREE(cs->request.dependencies);
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> index 90b9e83..495d55b 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> @@ -87,20 +87,25 @@ struct amdgpu_cs_context {
>     amdgpu_bo_handle            *handles;
>     uint8_t                     *flags;
>     struct amdgpu_cs_buffer     *real_buffers;
>
>     unsigned                    num_slab_buffers;
>     unsigned                    max_slab_buffers;
>     struct amdgpu_cs_buffer     *slab_buffers;
>
>     int                         buffer_indices_hashlist[4096];
>
> +   struct amdgpu_winsys_bo     *last_added_bo;
> +   unsigned                    last_added_bo_index;
> +   unsigned                    last_added_bo_usage;
> +   uint64_t                    last_added_bo_priority_usage;
> +
>     unsigned                    max_dependencies;
>
>     struct pipe_fence_handle    *fence;
>
>     /* the error returned from cs_flush for non-async submissions */
>     int                         error_code;
>  };
>
>  struct amdgpu_cs {
>     struct amdgpu_ib main; /* must be first because this is inherited */
>


More information about the mesa-dev mailing list