[Mesa-dev] [PATCH 10/14] winsys/radeon: separate adding a buffer from updating its reloc data
Nicolai Hähnle
nhaehnle at gmail.com
Tue Sep 13 09:56:21 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 52 +++++++++------------------
1 file changed, 17 insertions(+), 35 deletions(-)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index a3b03be..20f90cf 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -182,33 +182,20 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
cs->csc = &cs->csc1;
cs->cst = &cs->csc2;
cs->base.current.buf = cs->csc->buf;
cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
cs->ring_type = ring_type;
p_atomic_inc(&ws->num_cs);
return &cs->base;
}
-static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
- enum radeon_bo_domain rd,
- enum radeon_bo_domain wd,
- unsigned priority,
- enum radeon_bo_domain *added_domains)
-{
- *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
-
- reloc->read_domains |= rd;
- reloc->write_domain |= wd;
- reloc->flags = MAX2(reloc->flags, priority);
-}
-
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
{
unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
int i = csc->reloc_indices_hashlist[hash];
/* not found or found */
if (i == -1 || csc->relocs_bo[i].bo == bo)
return i;
/* Hash collision, look for the BO in the list of relocs linearly. */
@@ -223,44 +210,31 @@ int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
* AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
* will collide here: ^ and here: ^,
* meaning that we should get very few collisions in the end. */
csc->reloc_indices_hashlist[hash] = i;
return i;
}
}
return -1;
}
-static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
- struct radeon_bo *bo,
- enum radeon_bo_usage usage,
- enum radeon_bo_domain domains,
- unsigned priority,
- enum radeon_bo_domain *added_domains)
+static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs,
+ struct radeon_bo *bo)
{
struct radeon_cs_context *csc = cs->csc;
struct drm_radeon_cs_reloc *reloc;
unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
- enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
- enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
int i = -1;
- assert(priority < 64);
- *added_domains = 0;
-
i = radeon_lookup_buffer(csc, bo);
if (i >= 0) {
- reloc = &csc->relocs[i];
- update_reloc(reloc, rd, wd, priority / 4, added_domains);
- csc->relocs_bo[i].priority_usage |= 1llu << priority;
-
/* For async DMA, every add_buffer call must add a buffer to the list
* no matter how many duplicates there are. This is due to the fact
* the DMA CS checker doesn't use NOP packets for offset patching,
* but always uses the i-th buffer from the list to patch the i-th
* offset. If there are N offsets in a DMA CS, there must also be N
* buffers in the relocation list.
*
* This doesn't have to be done if virtual memory is enabled,
* because there is no offset patching with virtual memory.
*/
@@ -278,48 +252,56 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
csc->relocs_bo = realloc(csc->relocs_bo, size);
size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc);
csc->relocs = realloc(csc->relocs, size);
csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
}
/* Initialize the new relocation. */
csc->relocs_bo[csc->num_relocs].bo = NULL;
- csc->relocs_bo[csc->num_relocs].priority_usage = 1llu << priority;
+ csc->relocs_bo[csc->num_relocs].priority_usage = 0;
radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
p_atomic_inc(&bo->num_cs_references);
reloc = &csc->relocs[csc->num_relocs];
reloc->handle = bo->handle;
- reloc->read_domains = rd;
- reloc->write_domain = wd;
- reloc->flags = priority / 4;
+ reloc->read_domains = 0;
+ reloc->write_domain = 0;
+ reloc->flags = 0;
csc->reloc_indices_hashlist[hash] = csc->num_relocs;
csc->chunks[1].length_dw += RELOC_DWORDS;
- *added_domains = rd | wd;
return csc->num_relocs++;
}
static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
struct pb_buffer *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
enum radeon_bo_priority priority)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct radeon_bo *bo = (struct radeon_bo*)buf;
enum radeon_bo_domain added_domains;
- unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
- &added_domains);
+ enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
+ enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
+ struct drm_radeon_cs_reloc *reloc;
+ unsigned index = radeon_lookup_or_add_buffer(cs, bo);
+
+ reloc = &cs->csc->relocs[index];
+ added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
+ reloc->read_domains |= rd;
+ reloc->write_domain |= wd;
+ reloc->flags = MAX2(reloc->flags, priority);
+ cs->csc->relocs_bo[index].priority_usage |= 1llu << priority;
if (added_domains & RADEON_DOMAIN_VRAM)
cs->base.used_vram += bo->base.size;
else if (added_domains & RADEON_DOMAIN_GTT)
cs->base.used_gart += bo->base.size;
return index;
}
static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
--
2.7.4
More information about the mesa-dev
mailing list