[Mesa-dev] [PATCH 1/2] winsys/amdgpu: add amdgpu_winsys_bo::lock
Nicolai Hähnle
nhaehnle at gmail.com
Wed Nov 21 17:57:33 UTC 2018
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
We'll use it in the upcoming mapping change. Sparse buffers have always
had one.
---
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 19 +++++++++++++------
src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 4 ++--
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 10 +++++-----
3 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index f49fb47b80e..9f0d4c12482 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -196,20 +196,21 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
if (bo->u.real.map_count >= 1) {
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
ws->mapped_vram -= bo->base.size;
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
ws->mapped_gtt -= bo->base.size;
ws->num_mapped_buffers--;
}
+ simple_mtx_destroy(&bo->lock);
FREE(bo);
}
static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
assert(bo->bo); /* slab buffers have a separate vtbl */
if (bo->u.real.use_reusable_pool)
@@ -461,20 +462,21 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
AMDGPU_VM_PAGE_EXECUTABLE;
if (!(flags & RADEON_FLAG_READ_ONLY))
vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
+ simple_mtx_init(&bo->lock, mtx_plain);
pipe_reference_init(&bo->base.reference, 1);
bo->base.alignment = alignment;
bo->base.usage = 0;
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
bo->ws = ws;
bo->bo = buf_handle;
bo->va = va;
bo->u.real.va_handle = va_handle;
bo->initial_domain = initial_domain;
@@ -564,20 +566,21 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
if (!slab->entries)
goto fail_buffer;
LIST_INITHEAD(&slab->base.free);
base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);
for (unsigned i = 0; i < slab->base.num_entries; ++i) {
struct amdgpu_winsys_bo *bo = &slab->entries[i];
+ simple_mtx_init(&bo->lock, mtx_plain);
bo->base.alignment = entry_size;
bo->base.usage = slab->buffer->base.usage;
bo->base.size = entry_size;
bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;
bo->ws = ws;
bo->va = slab->buffer->va + i * entry_size;
bo->initial_domain = domains;
bo->unique_id = base_id + i;
bo->u.slab.entry.slab = &slab->base;
bo->u.slab.entry.group_index = group_index;
@@ -592,22 +595,24 @@ fail_buffer:
amdgpu_winsys_bo_reference(&slab->buffer, NULL);
fail:
FREE(slab);
return NULL;
}
void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
{
struct amdgpu_slab *slab = amdgpu_slab(pslab);
- for (unsigned i = 0; i < slab->base.num_entries; ++i)
+ for (unsigned i = 0; i < slab->base.num_entries; ++i) {
amdgpu_bo_remove_fences(&slab->entries[i]);
+ simple_mtx_destroy(&slab->entries[i].lock);
+ }
FREE(slab->entries);
amdgpu_winsys_bo_reference(&slab->buffer, NULL);
FREE(slab);
}
#if DEBUG_SPARSE_COMMITS
static void
sparse_dump(struct amdgpu_winsys_bo *bo, const char *func)
{
@@ -851,22 +856,22 @@ static void amdgpu_bo_sparse_destroy(struct pb_buffer *_buf)
}
while (!list_empty(&bo->u.sparse.backing)) {
struct amdgpu_sparse_backing *dummy = NULL;
sparse_free_backing_buffer(bo,
container_of(bo->u.sparse.backing.next,
dummy, list));
}
amdgpu_va_range_free(bo->u.sparse.va_handle);
- simple_mtx_destroy(&bo->u.sparse.commit_lock);
FREE(bo->u.sparse.commitments);
+ simple_mtx_destroy(&bo->lock);
FREE(bo);
}
static const struct pb_vtbl amdgpu_winsys_bo_sparse_vtbl = {
amdgpu_bo_sparse_destroy
/* other functions are never called */
};
static struct pb_buffer *
amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
@@ -882,37 +887,37 @@ amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
* that exceed this limit. This is not really a restriction: we don't have
* that much virtual address space anyway.
*/
if (size > (uint64_t)INT32_MAX * RADEON_SPARSE_PAGE_SIZE)
return NULL;
bo = CALLOC_STRUCT(amdgpu_winsys_bo);
if (!bo)
return NULL;
+ simple_mtx_init(&bo->lock, mtx_plain);
pipe_reference_init(&bo->base.reference, 1);
bo->base.alignment = RADEON_SPARSE_PAGE_SIZE;
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_sparse_vtbl;
bo->ws = ws;
bo->initial_domain = domain;
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
bo->sparse = true;
bo->u.sparse.flags = flags & ~RADEON_FLAG_SPARSE;
bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
sizeof(*bo->u.sparse.commitments));
if (!bo->u.sparse.commitments)
goto error_alloc_commitments;
- simple_mtx_init(&bo->u.sparse.commit_lock, mtx_plain);
LIST_INITHEAD(&bo->u.sparse.backing);
/* For simplicity, we always map a multiple of the page size. */
map_size = align64(size, RADEON_SPARSE_PAGE_SIZE);
va_gap_size = ws->check_vm ? 4 * RADEON_SPARSE_PAGE_SIZE : 0;
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
map_size + va_gap_size, RADEON_SPARSE_PAGE_SIZE,
0, &bo->va, &bo->u.sparse.va_handle,
AMDGPU_VA_RANGE_HIGH);
if (r)
@@ -921,23 +926,23 @@ amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0, size, bo->va,
AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
return &bo->base;
error_va_map:
amdgpu_va_range_free(bo->u.sparse.va_handle);
error_va_alloc:
- simple_mtx_destroy(&bo->u.sparse.commit_lock);
FREE(bo->u.sparse.commitments);
error_alloc_commitments:
+ simple_mtx_destroy(&bo->lock);
FREE(bo);
return NULL;
}
static bool
amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,
bool commit)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);
struct amdgpu_sparse_commitment *comm;
@@ -948,21 +953,21 @@ amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,
assert(bo->sparse);
assert(offset % RADEON_SPARSE_PAGE_SIZE == 0);
assert(offset <= bo->base.size);
assert(size <= bo->base.size - offset);
assert(size % RADEON_SPARSE_PAGE_SIZE == 0 || offset + size == bo->base.size);
comm = bo->u.sparse.commitments;
va_page = offset / RADEON_SPARSE_PAGE_SIZE;
end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
- simple_mtx_lock(&bo->u.sparse.commit_lock);
+ simple_mtx_lock(&bo->lock);
#if DEBUG_SPARSE_COMMITS
sparse_dump(bo, __func__);
#endif
if (commit) {
while (va_page < end_va_page) {
uint32_t span_va_page;
/* Skip pages that are already committed. */
@@ -1052,21 +1057,21 @@ amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,
if (!sparse_backing_free(bo, backing, backing_start, span_pages)) {
/* Couldn't allocate tracking data structures, so we have to leak */
fprintf(stderr, "amdgpu: leaking PRT backing memory\n");
ok = false;
}
}
}
out:
- simple_mtx_unlock(&bo->u.sparse.commit_lock);
+ simple_mtx_unlock(&bo->lock);
return ok;
}
static unsigned eg_tile_split(unsigned tile_split)
{
switch (tile_split) {
case 0: tile_split = 64; break;
case 1: tile_split = 128; break;
case 2: tile_split = 256; break;
@@ -1331,20 +1336,21 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
if (r)
goto error;
if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
initial |= RADEON_DOMAIN_VRAM;
if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
initial |= RADEON_DOMAIN_GTT;
/* Initialize the structure. */
+ simple_mtx_init(&bo->lock, mtx_plain);
pipe_reference_init(&bo->base.reference, 1);
bo->base.alignment = info.phys_alignment;
bo->bo = result.buf_handle;
bo->base.size = result.alloc_size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
bo->ws = ws;
bo->va = va;
bo->u.real.va_handle = va_handle;
bo->initial_domain = initial;
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
@@ -1441,20 +1447,21 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
aligned_size, 1 << 12, 0, &va, &va_handle,
AMDGPU_VA_RANGE_HIGH))
goto error_va_alloc;
if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP))
goto error_va_map;
/* Initialize it. */
pipe_reference_init(&bo->base.reference, 1);
+ simple_mtx_init(&bo->lock, mtx_plain);
bo->bo = buf_handle;
bo->base.alignment = 0;
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
bo->ws = ws;
bo->user_ptr = pointer;
bo->va = va;
bo->u.real.va_handle = va_handle;
bo->initial_domain = RADEON_DOMAIN_GTT;
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 1e07e4734aa..58e6eed733d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -67,39 +67,39 @@ struct amdgpu_winsys_bo {
struct list_head global_list_item;
uint32_t kms_handle;
} real;
struct {
struct pb_slab_entry entry;
struct amdgpu_winsys_bo *real;
} slab;
struct {
- simple_mtx_t commit_lock;
amdgpu_va_handle va_handle;
enum radeon_bo_flag flags;
uint32_t num_va_pages;
uint32_t num_backing_pages;
struct list_head backing;
/* Commitment information for each page of the virtual memory area. */
struct amdgpu_sparse_commitment *commitments;
} sparse;
} u;
struct amdgpu_winsys *ws;
void *user_ptr; /* from buffer_from_ptr */
amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */
bool sparse;
+ bool is_local;
uint32_t unique_id;
uint64_t va;
enum radeon_bo_domain initial_domain;
/* how many command streams is this bo referenced in? */
int num_cs_references;
/* how many command streams, which are being emitted in a separate
* thread, is this bo referenced in? */
volatile int num_active_ioctls;
@@ -107,21 +107,21 @@ struct amdgpu_winsys_bo {
/* whether buffer_get_handle or buffer_from_handle was called,
* it can only transition from false to true
*/
volatile int is_shared; /* bool (int for atomicity) */
/* Fences for buffer synchronization. */
unsigned num_fences;
unsigned max_fences;
struct pipe_fence_handle **fences;
- bool is_local;
+ simple_mtx_t lock;
};
struct amdgpu_slab {
struct pb_slab base;
struct amdgpu_winsys_bo *buffer;
struct amdgpu_winsys_bo *entries;
};
bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf);
void amdgpu_bo_destroy(struct pb_buffer *_buf);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 5ec3b470a15..9e4de7779e2 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -591,30 +591,30 @@ static int amdgpu_lookup_or_add_sparse_buffer(struct amdgpu_cs *acs,
amdgpu_winsys_bo_reference(&buffer->bo, bo);
p_atomic_inc(&bo->num_cs_references);
cs->num_sparse_buffers++;
hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);
cs->buffer_indices_hashlist[hash] = idx;
/* We delay adding the backing buffers until we really have to. However,
* we cannot delay accounting for memory use.
*/
- simple_mtx_lock(&bo->u.sparse.commit_lock);
+ simple_mtx_lock(&bo->lock);
list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
acs->main.base.used_vram += backing->bo->base.size;
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
acs->main.base.used_gart += backing->bo->base.size;
}
- simple_mtx_unlock(&bo->u.sparse.commit_lock);
+ simple_mtx_unlock(&bo->lock);
return idx;
}
static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
struct pb_buffer *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
enum radeon_bo_priority priority)
{
@@ -1260,39 +1260,39 @@ static void amdgpu_cs_add_syncobj_signal(struct radeon_cmdbuf *rws,
*
* This is done late, during submission, to keep the buffer list short before
* submit, and to avoid managing fences for the backing buffers.
*/
static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_cs_context *cs)
{
for (unsigned i = 0; i < cs->num_sparse_buffers; ++i) {
struct amdgpu_cs_buffer *buffer = &cs->sparse_buffers[i];
struct amdgpu_winsys_bo *bo = buffer->bo;
- simple_mtx_lock(&bo->u.sparse.commit_lock);
+ simple_mtx_lock(&bo->lock);
list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
/* We can directly add the buffer here, because we know that each
* backing buffer occurs only once.
*/
int idx = amdgpu_do_add_real_buffer(cs, backing->bo);
if (idx < 0) {
fprintf(stderr, "%s: failed to add buffer\n", __FUNCTION__);
- simple_mtx_unlock(&bo->u.sparse.commit_lock);
+ simple_mtx_unlock(&bo->lock);
return false;
}
cs->real_buffers[idx].usage = buffer->usage & ~RADEON_USAGE_SYNCHRONIZED;
cs->real_buffers[idx].u.real.priority_usage = buffer->u.real.priority_usage;
p_atomic_inc(&backing->bo->num_active_ioctls);
}
- simple_mtx_unlock(&bo->u.sparse.commit_lock);
+ simple_mtx_unlock(&bo->lock);
}
return true;
}
void amdgpu_cs_submit_ib(void *job, int thread_index)
{
struct amdgpu_cs *acs = (struct amdgpu_cs*)job;
struct amdgpu_winsys *ws = acs->ctx->ws;
struct amdgpu_cs_context *cs = acs->cst;
--
2.19.1
More information about the mesa-dev
mailing list