[Mesa-dev] [PATCH 10/14] winsys/radeon: separate adding a buffer from updating its reloc data

Nicolai Hähnle nhaehnle at gmail.com
Tue Sep 13 09:56:21 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 52 +++++++++------------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index a3b03be..20f90cf 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -182,33 +182,20 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
     cs->csc = &cs->csc1;
     cs->cst = &cs->csc2;
     cs->base.current.buf = cs->csc->buf;
     cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
     cs->ring_type = ring_type;
 
     p_atomic_inc(&ws->num_cs);
     return &cs->base;
 }
 
-static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
-                                enum radeon_bo_domain rd,
-                                enum radeon_bo_domain wd,
-                                unsigned priority,
-                                enum radeon_bo_domain *added_domains)
-{
-    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
-
-    reloc->read_domains |= rd;
-    reloc->write_domain |= wd;
-    reloc->flags = MAX2(reloc->flags, priority);
-}
-
 int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
 {
     unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
     int i = csc->reloc_indices_hashlist[hash];
 
     /* not found or found */
     if (i == -1 || csc->relocs_bo[i].bo == bo)
         return i;
 
     /* Hash collision, look for the BO in the list of relocs linearly. */
@@ -223,44 +210,31 @@ int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
              *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
              * will collide here: ^ and here:   ^,
              * meaning that we should get very few collisions in the end. */
             csc->reloc_indices_hashlist[hash] = i;
             return i;
         }
     }
     return -1;
 }
 
-static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
-                                 struct radeon_bo *bo,
-                                 enum radeon_bo_usage usage,
-                                 enum radeon_bo_domain domains,
-                                 unsigned priority,
-                                 enum radeon_bo_domain *added_domains)
+static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs,
+                                            struct radeon_bo *bo)
 {
     struct radeon_cs_context *csc = cs->csc;
     struct drm_radeon_cs_reloc *reloc;
     unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
-    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
-    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
     int i = -1;
 
-    assert(priority < 64);
-    *added_domains = 0;
-
     i = radeon_lookup_buffer(csc, bo);
 
     if (i >= 0) {
-        reloc = &csc->relocs[i];
-        update_reloc(reloc, rd, wd, priority / 4, added_domains);
-        csc->relocs_bo[i].priority_usage |= 1llu << priority;
-
         /* For async DMA, every add_buffer call must add a buffer to the list
          * no matter how many duplicates there are. This is due to the fact
          * the DMA CS checker doesn't use NOP packets for offset patching,
          * but always uses the i-th buffer from the list to patch the i-th
          * offset. If there are N offsets in a DMA CS, there must also be N
          * buffers in the relocation list.
          *
          * This doesn't have to be done if virtual memory is enabled,
          * because there is no offset patching with virtual memory.
          */
@@ -278,48 +252,56 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
         csc->relocs_bo = realloc(csc->relocs_bo, size);
 
         size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc);
         csc->relocs = realloc(csc->relocs, size);
 
         csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
     }
 
     /* Initialize the new relocation. */
     csc->relocs_bo[csc->num_relocs].bo = NULL;
-    csc->relocs_bo[csc->num_relocs].priority_usage = 1llu << priority;
+    csc->relocs_bo[csc->num_relocs].priority_usage = 0;
     radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
     p_atomic_inc(&bo->num_cs_references);
     reloc = &csc->relocs[csc->num_relocs];
     reloc->handle = bo->handle;
-    reloc->read_domains = rd;
-    reloc->write_domain = wd;
-    reloc->flags = priority / 4;
+    reloc->read_domains = 0;
+    reloc->write_domain = 0;
+    reloc->flags = 0;
 
     csc->reloc_indices_hashlist[hash] = csc->num_relocs;
 
     csc->chunks[1].length_dw += RELOC_DWORDS;
 
-    *added_domains = rd | wd;
     return csc->num_relocs++;
 }
 
 static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
                                         struct pb_buffer *buf,
                                         enum radeon_bo_usage usage,
                                         enum radeon_bo_domain domains,
                                         enum radeon_bo_priority priority)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct radeon_bo *bo = (struct radeon_bo*)buf;
     enum radeon_bo_domain added_domains;
-    unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
-                                       &added_domains);
+    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
+    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
+    struct drm_radeon_cs_reloc *reloc;
+    unsigned index = radeon_lookup_or_add_buffer(cs, bo);
+
+    reloc = &cs->csc->relocs[index];
+    added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
+    reloc->read_domains |= rd;
+    reloc->write_domain |= wd;
+    reloc->flags = MAX2(reloc->flags, priority);
+    cs->csc->relocs_bo[index].priority_usage |= 1llu << priority;
 
     if (added_domains & RADEON_DOMAIN_VRAM)
         cs->base.used_vram += bo->base.size;
     else if (added_domains & RADEON_DOMAIN_GTT)
         cs->base.used_gart += bo->base.size;
 
     return index;
 }
 
 static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
-- 
2.7.4



More information about the mesa-dev mailing list