[Mesa-dev] [PATCH] winsys/radeon: consolidate hash table lookup
Christian König
deathsimple at vodafone.de
Sat Apr 12 01:35:48 PDT 2014
Am 11.04.2014 19:03, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
BTW:
I've always wondered if the custom hash table is the best approach here.
Having a BO active in more than one command submission context at the
same time sounds rather unlikely to me.
Something like storing a pointer to the last used CS and it's index in
the BO and only when a BO is really used in more than one CS context at
the same time fall back to the hashtable and lockup the index from there
sounds like less overhead.
>
> I should have done this long ago.
> ---
> src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 110 +++++++++++---------------
> src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 7 +-
> 2 files changed, 49 insertions(+), 68 deletions(-)
>
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> index db9fbfa..b55eb80 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> @@ -188,41 +188,40 @@ static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
> reloc->flags = MAX2(reloc->flags, priority);
> }
>
> -int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
> +int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo,
> + struct drm_radeon_cs_reloc **out_reloc)
> {
> - struct drm_radeon_cs_reloc *reloc;
> - unsigned i;
> + struct drm_radeon_cs_reloc *reloc = NULL;
> unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
> + int i = -1;
>
> if (csc->is_handle_added[hash]) {
> i = csc->reloc_indices_hashlist[hash];
> reloc = &csc->relocs[i];
> - if (reloc->handle == bo->handle) {
> - return i;
> - }
>
> - /* Hash collision, look for the BO in the list of relocs linearly. */
> - for (i = csc->crelocs; i != 0;) {
> - --i;
> - reloc = &csc->relocs[i];
> - if (reloc->handle == bo->handle) {
> - /* Put this reloc in the hash list.
> - * This will prevent additional hash collisions if there are
> - * several consecutive get_reloc calls for the same buffer.
> - *
> - * Example: Assuming buffers A,B,C collide in the hash list,
> - * the following sequence of relocs:
> - * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
> - * will collide here: ^ and here: ^,
> - * meaning that we should get very few collisions in the end. */
> - csc->reloc_indices_hashlist[hash] = i;
> - /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
> - return i;
> + if (reloc->handle != bo->handle) {
> + /* Hash collision, look for the BO in the list of relocs linearly. */
> + for (i = csc->crelocs - 1; i >= 0; i--) {
> + reloc = &csc->relocs[i];
> + if (reloc->handle == bo->handle) {
> + /* Put this reloc in the hash list.
> + * This will prevent additional hash collisions if there are
> + * several consecutive get_reloc calls for the same buffer.
> + *
> + * Example: Assuming buffers A,B,C collide in the hash list,
> + * the following sequence of relocs:
> + * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
> + * will collide here: ^ and here: ^,
> + * meaning that we should get very few collisions in the end. */
> + csc->reloc_indices_hashlist[hash] = i;
> + break;
> + }
> }
> }
> }
> -
> - return -1;
> + if (out_reloc)
> + *out_reloc = reloc;
> + return i;
> }
>
> static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
> @@ -237,45 +236,28 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
> unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
> enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
> enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
> - bool update_hash = TRUE;
> - int i;
> + int i = -1;
>
> priority = MIN2(priority, 15);
> *added_domains = 0;
>
> - if (csc->is_handle_added[hash]) {
> - i = csc->reloc_indices_hashlist[hash];
> - reloc = &csc->relocs[i];
> -
> - if (reloc->handle != bo->handle) {
> - /* Hash collision, look for the BO in the list of relocs linearly. */
> - for (i = csc->crelocs - 1; i >= 0; i--) {
> - reloc = &csc->relocs[i];
> - if (reloc->handle == bo->handle) {
> - /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
> - break;
> - }
> - }
> - }
> -
> - if (i >= 0) {
> - update_reloc(reloc, rd, wd, priority, added_domains);
> -
> - /* For async DMA, every add_reloc call must add a buffer to the list
> - * no matter how many duplicates there are. This is due to the fact
> - * the DMA CS checker doesn't use NOP packets for offset patching,
> - * but always uses the i-th buffer from the list to patch the i-th
> - * offset. If there are N offsets in a DMA CS, there must also be N
> - * buffers in the relocation list.
> - *
> - * This doesn't have to be done if virtual memory is enabled,
> - * because there is no offset patching with virtual memory.
> - */
> - if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
> - csc->reloc_indices_hashlist[hash] = i;
> - return i;
> - }
> - update_hash = FALSE;
> + i = radeon_get_reloc(csc, bo, &reloc);
> +
> + if (i >= 0) {
> + update_reloc(reloc, rd, wd, priority, added_domains);
> +
> + /* For async DMA, every add_reloc call must add a buffer to the list
> + * no matter how many duplicates there are. This is due to the fact
> + * the DMA CS checker doesn't use NOP packets for offset patching,
> + * but always uses the i-th buffer from the list to patch the i-th
> + * offset. If there are N offsets in a DMA CS, there must also be N
> + * buffers in the relocation list.
> + *
> + * This doesn't have to be done if virtual memory is enabled,
> + * because there is no offset patching with virtual memory.
> + */
> + if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
> + return i;
> }
> }
>
> @@ -304,9 +286,7 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
> reloc->flags = priority;
>
> csc->is_handle_added[hash] = TRUE;
> - if (update_hash) {
> - csc->reloc_indices_hashlist[hash] = csc->crelocs;
> - }
> + csc->reloc_indices_hashlist[hash] = csc->crelocs;
>
> csc->chunks[1].length_dw += RELOC_DWORDS;
>
> @@ -384,7 +364,7 @@ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
> {
> struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
> struct radeon_bo *bo = (struct radeon_bo*)buf;
> - unsigned index = radeon_get_reloc(cs->csc, bo);
> + unsigned index = radeon_get_reloc(cs->csc, bo, NULL);
>
> if (index == -1) {
> fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__);
> @@ -600,7 +580,7 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
> if (!bo->num_cs_references)
> return FALSE;
>
> - index = radeon_get_reloc(cs->csc, bo);
> + index = radeon_get_reloc(cs->csc, bo, NULL);
> if (index == -1)
> return FALSE;
>
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> index ebec161..460e9fa 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> @@ -80,7 +80,8 @@ struct radeon_drm_cs {
> struct radeon_bo *trace_buf;
> };
>
> -int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
> +int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo,
> + struct drm_radeon_cs_reloc **out_reloc);
>
> static INLINE struct radeon_drm_cs *
> radeon_drm_cs(struct radeon_winsys_cs *base)
> @@ -94,7 +95,7 @@ radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
> {
> int num_refs = bo->num_cs_references;
> return num_refs == bo->rws->num_cs ||
> - (num_refs && radeon_get_reloc(cs->csc, bo) != -1);
> + (num_refs && radeon_get_reloc(cs->csc, bo, NULL) != -1);
> }
>
> static INLINE boolean
> @@ -106,7 +107,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
> if (!bo->num_cs_references)
> return FALSE;
>
> - index = radeon_get_reloc(cs->csc, bo);
> + index = radeon_get_reloc(cs->csc, bo, NULL);
> if (index == -1)
> return FALSE;
>
More information about the mesa-dev
mailing list