[Mesa-dev] [PATCH] winsys/radeon: consolidate hash table lookup
Marek Olšák
maraeo at gmail.com
Sat Apr 12 03:52:38 PDT 2014
Do you mean having bo->cs and bo->index? That would work, but there
would have to be mutex_lock and mutex_unlock when accessing the
variables. I'm not sure if locking a mutex is more expensive than the
hash table lookup. OpenGL with GLX allows sharing buffers and textures
between contexts, so we must play safe.
Note that if all handles are less than 512, the lookup is always in
O(1), so it's very cheap. We can increase the number to 1024 or 2048
if we find out that apps use too many buffers.
Marek
On Sat, Apr 12, 2014 at 10:35 AM, Christian König
<deathsimple at vodafone.de> wrote:
> Am 11.04.2014 19:03, schrieb Marek Olšák:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>
>
> Reviewed-by: Christian König <christian.koenig at amd.com>
>
> BTW:
>
> I've always wondered if the custom hash table is the best approach here.
> Having a BO active in more than one command submission context at the same
> time sounds rather unlikely to me.
>
> Something like storing a pointer to the last used CS and it's index in the
> BO and only when a BO is really used in more than one CS context at the same
> time fall back to the hashtable and lockup the index from there sounds like
> less overhead.
>
>
>>
>> I should have done this long ago.
>> ---
>> src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 110
>> +++++++++++---------------
>> src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 7 +-
>> 2 files changed, 49 insertions(+), 68 deletions(-)
>>
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> index db9fbfa..b55eb80 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
>> @@ -188,41 +188,40 @@ static INLINE void update_reloc(struct
>> drm_radeon_cs_reloc *reloc,
>> reloc->flags = MAX2(reloc->flags, priority);
>> }
>> -int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo
>> *bo)
>> +int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo,
>> + struct drm_radeon_cs_reloc **out_reloc)
>> {
>> - struct drm_radeon_cs_reloc *reloc;
>> - unsigned i;
>> + struct drm_radeon_cs_reloc *reloc = NULL;
>> unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
>> + int i = -1;
>> if (csc->is_handle_added[hash]) {
>> i = csc->reloc_indices_hashlist[hash];
>> reloc = &csc->relocs[i];
>> - if (reloc->handle == bo->handle) {
>> - return i;
>> - }
>> - /* Hash collision, look for the BO in the list of relocs
>> linearly. */
>> - for (i = csc->crelocs; i != 0;) {
>> - --i;
>> - reloc = &csc->relocs[i];
>> - if (reloc->handle == bo->handle) {
>> - /* Put this reloc in the hash list.
>> - * This will prevent additional hash collisions if there
>> are
>> - * several consecutive get_reloc calls for the same
>> buffer.
>> - *
>> - * Example: Assuming buffers A,B,C collide in the hash
>> list,
>> - * the following sequence of relocs:
>> - * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
>> - * will collide here: ^ and here: ^,
>> - * meaning that we should get very few collisions in the
>> end. */
>> - csc->reloc_indices_hashlist[hash] = i;
>> - /*printf("write_reloc collision, hash: %i, handle: %i\n",
>> hash, bo->handle);*/
>> - return i;
>> + if (reloc->handle != bo->handle) {
>> + /* Hash collision, look for the BO in the list of relocs
>> linearly. */
>> + for (i = csc->crelocs - 1; i >= 0; i--) {
>> + reloc = &csc->relocs[i];
>> + if (reloc->handle == bo->handle) {
>> + /* Put this reloc in the hash list.
>> + * This will prevent additional hash collisions if
>> there are
>> + * several consecutive get_reloc calls for the same
>> buffer.
>> + *
>> + * Example: Assuming buffers A,B,C collide in the
>> hash list,
>> + * the following sequence of relocs:
>> + * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
>> + * will collide here: ^ and here: ^,
>> + * meaning that we should get very few collisions in
>> the end. */
>> + csc->reloc_indices_hashlist[hash] = i;
>> + break;
>> + }
>> }
>> }
>> }
>> -
>> - return -1;
>> + if (out_reloc)
>> + *out_reloc = reloc;
>> + return i;
>> }
>> static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
>> @@ -237,45 +236,28 @@ static unsigned radeon_add_reloc(struct
>> radeon_drm_cs *cs,
>> unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
>> enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
>> enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
>> - bool update_hash = TRUE;
>> - int i;
>> + int i = -1;
>> priority = MIN2(priority, 15);
>> *added_domains = 0;
>> - if (csc->is_handle_added[hash]) {
>> - i = csc->reloc_indices_hashlist[hash];
>> - reloc = &csc->relocs[i];
>> -
>> - if (reloc->handle != bo->handle) {
>> - /* Hash collision, look for the BO in the list of relocs
>> linearly. */
>> - for (i = csc->crelocs - 1; i >= 0; i--) {
>> - reloc = &csc->relocs[i];
>> - if (reloc->handle == bo->handle) {
>> - /*printf("write_reloc collision, hash: %i, handle:
>> %i\n", hash, bo->handle);*/
>> - break;
>> - }
>> - }
>> - }
>> -
>> - if (i >= 0) {
>> - update_reloc(reloc, rd, wd, priority, added_domains);
>> -
>> - /* For async DMA, every add_reloc call must add a buffer to
>> the list
>> - * no matter how many duplicates there are. This is due to
>> the fact
>> - * the DMA CS checker doesn't use NOP packets for offset
>> patching,
>> - * but always uses the i-th buffer from the list to patch the
>> i-th
>> - * offset. If there are N offsets in a DMA CS, there must
>> also be N
>> - * buffers in the relocation list.
>> - *
>> - * This doesn't have to be done if virtual memory is enabled,
>> - * because there is no offset patching with virtual memory.
>> - */
>> - if (cs->base.ring_type != RING_DMA ||
>> cs->ws->info.r600_virtual_address) {
>> - csc->reloc_indices_hashlist[hash] = i;
>> - return i;
>> - }
>> - update_hash = FALSE;
>> + i = radeon_get_reloc(csc, bo, &reloc);
>> +
>> + if (i >= 0) {
>> + update_reloc(reloc, rd, wd, priority, added_domains);
>> +
>> + /* For async DMA, every add_reloc call must add a buffer to the
>> list
>> + * no matter how many duplicates there are. This is due to the
>> fact
>> + * the DMA CS checker doesn't use NOP packets for offset
>> patching,
>> + * but always uses the i-th buffer from the list to patch the
>> i-th
>> + * offset. If there are N offsets in a DMA CS, there must also be
>> N
>> + * buffers in the relocation list.
>> + *
>> + * This doesn't have to be done if virtual memory is enabled,
>> + * because there is no offset patching with virtual memory.
>> + */
>> + if (cs->base.ring_type != RING_DMA ||
>> cs->ws->info.r600_virtual_address) {
>> + return i;
>> }
>> }
>> @@ -304,9 +286,7 @@ static unsigned radeon_add_reloc(struct
>> radeon_drm_cs *cs,
>> reloc->flags = priority;
>> csc->is_handle_added[hash] = TRUE;
>> - if (update_hash) {
>> - csc->reloc_indices_hashlist[hash] = csc->crelocs;
>> - }
>> + csc->reloc_indices_hashlist[hash] = csc->crelocs;
>> csc->chunks[1].length_dw += RELOC_DWORDS;
>> @@ -384,7 +364,7 @@ static void radeon_drm_cs_write_reloc(struct
>> radeon_winsys_cs *rcs,
>> {
>> struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
>> struct radeon_bo *bo = (struct radeon_bo*)buf;
>> - unsigned index = radeon_get_reloc(cs->csc, bo);
>> + unsigned index = radeon_get_reloc(cs->csc, bo, NULL);
>> if (index == -1) {
>> fprintf(stderr, "radeon: Cannot get a relocation in %s.\n",
>> __func__);
>> @@ -600,7 +580,7 @@ static boolean radeon_bo_is_referenced(struct
>> radeon_winsys_cs *rcs,
>> if (!bo->num_cs_references)
>> return FALSE;
>> - index = radeon_get_reloc(cs->csc, bo);
>> + index = radeon_get_reloc(cs->csc, bo, NULL);
>> if (index == -1)
>> return FALSE;
>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
>> b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
>> index ebec161..460e9fa 100644
>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
>> @@ -80,7 +80,8 @@ struct radeon_drm_cs {
>> struct radeon_bo *trace_buf;
>> };
>> -int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo
>> *bo);
>> +int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo,
>> + struct drm_radeon_cs_reloc **out_reloc);
>> static INLINE struct radeon_drm_cs *
>> radeon_drm_cs(struct radeon_winsys_cs *base)
>> @@ -94,7 +95,7 @@ radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
>> {
>> int num_refs = bo->num_cs_references;
>> return num_refs == bo->rws->num_cs ||
>> - (num_refs && radeon_get_reloc(cs->csc, bo) != -1);
>> + (num_refs && radeon_get_reloc(cs->csc, bo, NULL) != -1);
>> }
>> static INLINE boolean
>> @@ -106,7 +107,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct
>> radeon_drm_cs *cs,
>> if (!bo->num_cs_references)
>> return FALSE;
>> - index = radeon_get_reloc(cs->csc, bo);
>> + index = radeon_get_reloc(cs->csc, bo, NULL);
>> if (index == -1)
>> return FALSE;
>>
>
>
More information about the mesa-dev
mailing list