<div dir="ltr"><div>Reviewed-by: Marek Olšák <<a href="mailto:marek.olsak@amd.com">marek.olsak@amd.com</a>></div><div><br></div><div>Marek<br></div></div><br><div class="gmail_quote"><div dir="ltr">On Wed, Nov 21, 2018 at 12:57 PM Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com">nhaehnle@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>><br>
<br>
We'll use it in the upcoming mapping change. Sparse buffers have always<br>
had one.<br>
---<br>
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 19 +++++++++++++------<br>
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.h |  4 ++--<br>
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 10 +++++-----<br>
 3 files changed, 20 insertions(+), 13 deletions(-)<br>
<br>
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
index f49fb47b80e..9f0d4c12482 100644<br>
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c<br>
@@ -196,20 +196,21 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)<br>
       ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);<br>
<br>
    if (bo->u.real.map_count >= 1) {<br>
       if (bo->initial_domain & RADEON_DOMAIN_VRAM)<br>
          ws->mapped_vram -= bo->base.size;<br>
       else if (bo->initial_domain & RADEON_DOMAIN_GTT)<br>
          ws->mapped_gtt -= bo->base.size;<br>
       ws->num_mapped_buffers--;<br>
    }<br>
<br>
+   simple_mtx_destroy(&bo->lock);<br>
    FREE(bo);<br>
 }<br>
<br>
 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)<br>
 {<br>
    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);<br>
<br>
    assert(bo->bo); /* slab buffers have a separate vtbl */<br>
<br>
    if (bo->u.real.use_reusable_pool)<br>
@@ -461,20 +462,21 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,<br>
                        AMDGPU_VM_PAGE_EXECUTABLE;<br>
<br>
    if (!(flags & RADEON_FLAG_READ_ONLY))<br>
        vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;<br>
<br>
    r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,<br>
                           AMDGPU_VA_OP_MAP);<br>
    if (r)<br>
       goto error_va_map;<br>
<br>
+   simple_mtx_init(&bo->lock, mtx_plain);<br>
    pipe_reference_init(&bo->base.reference, 1);<br>
    bo->base.alignment = alignment;<br>
    bo->base.usage = 0;<br>
    bo->base.size = size;<br>
    bo->base.vtbl = &amdgpu_winsys_bo_vtbl;<br>
    bo->ws = ws;<br>
    bo->bo = buf_handle;<br>
    bo->va = va;<br>
    bo->u.real.va_handle = va_handle;<br>
    bo->initial_domain = initial_domain;<br>
@@ -564,20 +566,21 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,<br>
    if (!slab->entries)<br>
       goto fail_buffer;<br>
<br>
    LIST_INITHEAD(&slab->base.free);<br>
<br>
    base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);<br>
<br>
    for (unsigned i = 0; i < slab->base.num_entries; ++i) {<br>
       struct amdgpu_winsys_bo *bo = &slab->entries[i];<br>
<br>
+      simple_mtx_init(&bo->lock, mtx_plain);<br>
       bo->base.alignment = entry_size;<br>
       bo->base.usage = slab->buffer->base.usage;<br>
       bo->base.size = entry_size;<br>
       bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;<br>
       bo->ws = ws;<br>
       bo->va = slab->buffer->va + i * entry_size;<br>
       bo->initial_domain = domains;<br>
       bo->unique_id = base_id + i;<br>
       bo->u.slab.entry.slab = &slab->base;<br>
       bo->u.slab.entry.group_index = group_index;<br>
@@ -592,22 +595,24 @@ fail_buffer:<br>
    amdgpu_winsys_bo_reference(&slab->buffer, NULL);<br>
 fail:<br>
    FREE(slab);<br>
    return NULL;<br>
 }<br>
<br>
 void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)<br>
 {<br>
    struct amdgpu_slab *slab = amdgpu_slab(pslab);<br>
<br>
-   for (unsigned i = 0; i < slab->base.num_entries; ++i)<br>
+   for (unsigned i = 0; i < slab->base.num_entries; ++i) {<br>
       amdgpu_bo_remove_fences(&slab->entries[i]);<br>
+      simple_mtx_destroy(&slab->entries[i].lock);<br>
+   }<br>
<br>
    FREE(slab->entries);<br>
    amdgpu_winsys_bo_reference(&slab->buffer, NULL);<br>
    FREE(slab);<br>
 }<br>
<br>
 #if DEBUG_SPARSE_COMMITS<br>
 static void<br>
 sparse_dump(struct amdgpu_winsys_bo *bo, const char *func)<br>
 {<br>
@@ -851,22 +856,22 @@ static void amdgpu_bo_sparse_destroy(struct pb_buffer *_buf)<br>
    }<br>
<br>
    while (!list_empty(&bo->u.sparse.backing)) {<br>
       struct amdgpu_sparse_backing *dummy = NULL;<br>
       sparse_free_backing_buffer(bo,<br>
                                  container_of(bo->u.sparse.backing.next,<br>
                                               dummy, list));<br>
    }<br>
<br>
    amdgpu_va_range_free(bo->u.sparse.va_handle);<br>
-   simple_mtx_destroy(&bo->u.sparse.commit_lock);<br>
    FREE(bo->u.sparse.commitments);<br>
+   simple_mtx_destroy(&bo->lock);<br>
    FREE(bo);<br>
 }<br>
<br>
 static const struct pb_vtbl amdgpu_winsys_bo_sparse_vtbl = {<br>
    amdgpu_bo_sparse_destroy<br>
    /* other functions are never called */<br>
 };<br>
<br>
 static struct pb_buffer *<br>
 amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,<br>
@@ -882,37 +887,37 @@ amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,<br>
     * that exceed this limit. This is not really a restriction: we don't have<br>
     * that much virtual address space anyway.<br>
     */<br>
    if (size > (uint64_t)INT32_MAX * RADEON_SPARSE_PAGE_SIZE)<br>
       return NULL;<br>
<br>
    bo = CALLOC_STRUCT(amdgpu_winsys_bo);<br>
    if (!bo)<br>
       return NULL;<br>
<br>
+   simple_mtx_init(&bo->lock, mtx_plain);<br>
    pipe_reference_init(&bo->base.reference, 1);<br>
    bo->base.alignment = RADEON_SPARSE_PAGE_SIZE;<br>
    bo->base.size = size;<br>
    bo->base.vtbl = &amdgpu_winsys_bo_sparse_vtbl;<br>
    bo->ws = ws;<br>
    bo->initial_domain = domain;<br>
    bo->unique_id =  __sync_fetch_and_add(&ws->next_bo_unique_id, 1);<br>
    bo->sparse = true;<br>
    bo->u.sparse.flags = flags & ~RADEON_FLAG_SPARSE;<br>
<br>
    bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);<br>
    bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,<br>
                                      sizeof(*bo->u.sparse.commitments));<br>
    if (!bo->u.sparse.commitments)<br>
       goto error_alloc_commitments;<br>
<br>
-   simple_mtx_init(&bo->u.sparse.commit_lock, mtx_plain);<br>
    LIST_INITHEAD(&bo->u.sparse.backing);<br>
<br>
    /* For simplicity, we always map a multiple of the page size. */<br>
    map_size = align64(size, RADEON_SPARSE_PAGE_SIZE);<br>
    va_gap_size = ws->check_vm ? 4 * RADEON_SPARSE_PAGE_SIZE : 0;<br>
    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,<br>
                              map_size + va_gap_size, RADEON_SPARSE_PAGE_SIZE,<br>
                              0, &bo->va, &bo->u.sparse.va_handle,<br>
                             AMDGPU_VA_RANGE_HIGH);<br>
    if (r)<br>
@@ -921,23 +926,23 @@ amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,<br>
    r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0, size, bo->va,<br>
                            AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);<br>
    if (r)<br>
       goto error_va_map;<br>
<br>
    return &bo->base;<br>
<br>
 error_va_map:<br>
    amdgpu_va_range_free(bo->u.sparse.va_handle);<br>
 error_va_alloc:<br>
-   simple_mtx_destroy(&bo->u.sparse.commit_lock);<br>
    FREE(bo->u.sparse.commitments);<br>
 error_alloc_commitments:<br>
+   simple_mtx_destroy(&bo->lock);<br>
    FREE(bo);<br>
    return NULL;<br>
 }<br>
<br>
 static bool<br>
 amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,<br>
                         bool commit)<br>
 {<br>
    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);<br>
    struct amdgpu_sparse_commitment *comm;<br>
@@ -948,21 +953,21 @@ amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,<br>
    assert(bo->sparse);<br>
    assert(offset % RADEON_SPARSE_PAGE_SIZE == 0);<br>
    assert(offset <= bo->base.size);<br>
    assert(size <= bo->base.size - offset);<br>
    assert(size % RADEON_SPARSE_PAGE_SIZE == 0 || offset + size == bo->base.size);<br>
<br>
    comm = bo->u.sparse.commitments;<br>
    va_page = offset / RADEON_SPARSE_PAGE_SIZE;<br>
    end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);<br>
<br>
-   simple_mtx_lock(&bo->u.sparse.commit_lock);<br>
+   simple_mtx_lock(&bo->lock);<br>
<br>
 #if DEBUG_SPARSE_COMMITS<br>
    sparse_dump(bo, __func__);<br>
 #endif<br>
<br>
    if (commit) {<br>
       while (va_page < end_va_page) {<br>
          uint32_t span_va_page;<br>
<br>
          /* Skip pages that are already committed. */<br>
@@ -1052,21 +1057,21 @@ amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,<br>
<br>
          if (!sparse_backing_free(bo, backing, backing_start, span_pages)) {<br>
             /* Couldn't allocate tracking data structures, so we have to leak */<br>
             fprintf(stderr, "amdgpu: leaking PRT backing memory\n");<br>
             ok = false;<br>
          }<br>
       }<br>
    }<br>
 out:<br>
<br>
-   simple_mtx_unlock(&bo->u.sparse.commit_lock);<br>
+   simple_mtx_unlock(&bo->lock);<br>
<br>
    return ok;<br>
 }<br>
<br>
 static unsigned eg_tile_split(unsigned tile_split)<br>
 {<br>
    switch (tile_split) {<br>
    case 0:     tile_split = 64;    break;<br>
    case 1:     tile_split = 128;   break;<br>
    case 2:     tile_split = 256;   break;<br>
@@ -1331,20 +1336,21 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,<br>
    r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);<br>
    if (r)<br>
       goto error;<br>
<br>
    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)<br>
       initial |= RADEON_DOMAIN_VRAM;<br>
    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)<br>
       initial |= RADEON_DOMAIN_GTT;<br>
<br>
    /* Initialize the structure. */<br>
+   simple_mtx_init(&bo->lock, mtx_plain);<br>
    pipe_reference_init(&bo->base.reference, 1);<br>
    bo->base.alignment = info.phys_alignment;<br>
    bo->bo = result.buf_handle;<br>
    bo->base.size = result.alloc_size;<br>
    bo->base.vtbl = &amdgpu_winsys_bo_vtbl;<br>
    bo->ws = ws;<br>
    bo->va = va;<br>
    bo->u.real.va_handle = va_handle;<br>
    bo->initial_domain = initial;<br>
    bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);<br>
@@ -1441,20 +1447,21 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,<br>
     if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,<br>
                               aligned_size, 1 << 12, 0, &va, &va_handle,<br>
                              AMDGPU_VA_RANGE_HIGH))<br>
         goto error_va_alloc;<br>
<br>
     if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP))<br>
         goto error_va_map;<br>
<br>
     /* Initialize it. */<br>
     pipe_reference_init(&bo->base.reference, 1);<br>
+    simple_mtx_init(&bo->lock, mtx_plain);<br>
     bo->bo = buf_handle;<br>
     bo->base.alignment = 0;<br>
     bo->base.size = size;<br>
     bo->base.vtbl = &amdgpu_winsys_bo_vtbl;<br>
     bo->ws = ws;<br>
     bo->user_ptr = pointer;<br>
     bo->va = va;<br>
     bo->u.real.va_handle = va_handle;<br>
     bo->initial_domain = RADEON_DOMAIN_GTT;<br>
     bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);<br>
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h<br>
index 1e07e4734aa..58e6eed733d 100644<br>
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h<br>
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h<br>
@@ -67,39 +67,39 @@ struct amdgpu_winsys_bo {<br>
<br>
          struct list_head global_list_item;<br>
<br>
          uint32_t kms_handle;<br>
       } real;<br>
       struct {<br>
          struct pb_slab_entry entry;<br>
          struct amdgpu_winsys_bo *real;<br>
       } slab;<br>
       struct {<br>
-         simple_mtx_t commit_lock;<br>
          amdgpu_va_handle va_handle;<br>
          enum radeon_bo_flag flags;<br>
<br>
          uint32_t num_va_pages;<br>
          uint32_t num_backing_pages;<br>
<br>
          struct list_head backing;<br>
<br>
          /* Commitment information for each page of the virtual memory area. */<br>
          struct amdgpu_sparse_commitment *commitments;<br>
       } sparse;<br>
    } u;<br>
<br>
    struct amdgpu_winsys *ws;<br>
    void *user_ptr; /* from buffer_from_ptr */<br>
<br>
    amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */<br>
    bool sparse;<br>
+   bool is_local;<br>
    uint32_t unique_id;<br>
    uint64_t va;<br>
    enum radeon_bo_domain initial_domain;<br>
<br>
    /* how many command streams is this bo referenced in? */<br>
    int num_cs_references;<br>
<br>
    /* how many command streams, which are being emitted in a separate<br>
     * thread, is this bo referenced in? */<br>
    volatile int num_active_ioctls;<br>
@@ -107,21 +107,21 @@ struct amdgpu_winsys_bo {<br>
    /* whether buffer_get_handle or buffer_from_handle was called,<br>
     * it can only transition from false to true<br>
     */<br>
    volatile int is_shared; /* bool (int for atomicity) */<br>
<br>
    /* Fences for buffer synchronization. */<br>
    unsigned num_fences;<br>
    unsigned max_fences;<br>
    struct pipe_fence_handle **fences;<br>
<br>
-   bool is_local;<br>
+   simple_mtx_t lock;<br>
 };<br>
<br>
 struct amdgpu_slab {<br>
    struct pb_slab base;<br>
    struct amdgpu_winsys_bo *buffer;<br>
    struct amdgpu_winsys_bo *entries;<br>
 };<br>
<br>
 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf);<br>
 void amdgpu_bo_destroy(struct pb_buffer *_buf);<br>
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c<br>
index 5ec3b470a15..9e4de7779e2 100644<br>
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c<br>
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c<br>
@@ -591,30 +591,30 @@ static int amdgpu_lookup_or_add_sparse_buffer(struct amdgpu_cs *acs,<br>
    amdgpu_winsys_bo_reference(&buffer->bo, bo);<br>
    p_atomic_inc(&bo->num_cs_references);<br>
    cs->num_sparse_buffers++;<br>
<br>
    hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);<br>
    cs->buffer_indices_hashlist[hash] = idx;<br>
<br>
    /* We delay adding the backing buffers until we really have to. However,<br>
     * we cannot delay accounting for memory use.<br>
     */<br>
-   simple_mtx_lock(&bo->u.sparse.commit_lock);<br>
+   simple_mtx_lock(&bo->lock);<br>
<br>
    list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {<br>
       if (bo->initial_domain & RADEON_DOMAIN_VRAM)<br>
          acs->main.base.used_vram += backing->bo->base.size;<br>
       else if (bo->initial_domain & RADEON_DOMAIN_GTT)<br>
          acs->main.base.used_gart += backing->bo->base.size;<br>
    }<br>
<br>
-   simple_mtx_unlock(&bo->u.sparse.commit_lock);<br>
+   simple_mtx_unlock(&bo->lock);<br>
<br>
    return idx;<br>
 }<br>
<br>
 static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,<br>
                                     struct pb_buffer *buf,<br>
                                     enum radeon_bo_usage usage,<br>
                                     enum radeon_bo_domain domains,<br>
                                     enum radeon_bo_priority priority)<br>
 {<br>
@@ -1260,39 +1260,39 @@ static void amdgpu_cs_add_syncobj_signal(struct radeon_cmdbuf *rws,<br>
  *<br>
  * This is done late, during submission, to keep the buffer list short before<br>
  * submit, and to avoid managing fences for the backing buffers.<br>
  */<br>
 static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_cs_context *cs)<br>
 {<br>
    for (unsigned i = 0; i < cs->num_sparse_buffers; ++i) {<br>
       struct amdgpu_cs_buffer *buffer = &cs->sparse_buffers[i];<br>
       struct amdgpu_winsys_bo *bo = buffer->bo;<br>
<br>
-      simple_mtx_lock(&bo->u.sparse.commit_lock);<br>
+      simple_mtx_lock(&bo->lock);<br>
<br>
       list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {<br>
          /* We can directly add the buffer here, because we know that each<br>
           * backing buffer occurs only once.<br>
           */<br>
          int idx = amdgpu_do_add_real_buffer(cs, backing->bo);<br>
          if (idx < 0) {<br>
             fprintf(stderr, "%s: failed to add buffer\n", __FUNCTION__);<br>
-            simple_mtx_unlock(&bo->u.sparse.commit_lock);<br>
+            simple_mtx_unlock(&bo->lock);<br>
             return false;<br>
          }<br>
<br>
          cs->real_buffers[idx].usage = buffer->usage & ~RADEON_USAGE_SYNCHRONIZED;<br>
          cs->real_buffers[idx].u.real.priority_usage = buffer->u.real.priority_usage;<br>
          p_atomic_inc(&backing->bo->num_active_ioctls);<br>
       }<br>
<br>
-      simple_mtx_unlock(&bo->u.sparse.commit_lock);<br>
+      simple_mtx_unlock(&bo->lock);<br>
    }<br>
<br>
    return true;<br>
 }<br>
<br>
 void amdgpu_cs_submit_ib(void *job, int thread_index)<br>
 {<br>
    struct amdgpu_cs *acs = (struct amdgpu_cs*)job;<br>
    struct amdgpu_winsys *ws = acs->ctx->ws;<br>
    struct amdgpu_cs_context *cs = acs->cst;<br>
-- <br>
2.19.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>