[Mesa-dev] [PATCH] winsys/amdgpu: fix a race condition between fence updates and IB submissions
Marek Olšák
maraeo at gmail.com
Mon Jan 2 20:20:34 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
The CS thread is needed to ensure proper ordering of operations and can't
be disabled (without complicating the code).
Discovered by Nine CSMT, which ended up in a deadlock.
---
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 31 +++++++++++++++------------
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 9 ++++----
2 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 95402bf..87246f7 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1060,25 +1060,23 @@ cleanup:
for (i = 0; i < cs->num_slab_buffers; i++)
p_atomic_dec(&cs->slab_buffers[i].bo->num_active_ioctls);
amdgpu_cs_context_cleanup(cs);
}
/* Make sure the previous submission is completed. */
void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
- struct amdgpu_winsys *ws = cs->ctx->ws;
/* Wait for any pending ioctl of this CS to complete. */
- if (util_queue_is_initialized(&ws->cs_queue))
- util_queue_job_wait(&cs->flush_completed);
+ util_queue_job_wait(&cs->flush_completed);
}
static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
unsigned flags,
struct pipe_fence_handle **fence)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys *ws = cs->ctx->ws;
int error_code = 0;
@@ -1150,53 +1148,58 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
cs->next_fence = NULL;
} else {
cur->fence = amdgpu_fence_create(cs->ctx,
cur->request.ip_type,
cur->request.ip_instance,
cur->request.ring);
}
if (fence)
amdgpu_fence_reference(fence, cur->fence);
- /* Prepare buffers. */
+ amdgpu_cs_sync_flush(rcs);
+
+ /* Prepare buffers.
+ *
+ * This fence must be held until the submission is queued to ensure
+ * that the order of fence dependency updates matches the order of
+ * submissions.
+ */
pipe_mutex_lock(ws->bo_fence_lock);
amdgpu_add_fence_dependencies(cs);
num_buffers = cur->num_real_buffers;
for (i = 0; i < num_buffers; i++) {
struct amdgpu_winsys_bo *bo = cur->real_buffers[i].bo;
p_atomic_inc(&bo->num_active_ioctls);
amdgpu_add_fence(bo, cur->fence);
}
num_buffers = cur->num_slab_buffers;
for (i = 0; i < num_buffers; i++) {
struct amdgpu_winsys_bo *bo = cur->slab_buffers[i].bo;
p_atomic_inc(&bo->num_active_ioctls);
amdgpu_add_fence(bo, cur->fence);
}
- pipe_mutex_unlock(ws->bo_fence_lock);
-
- amdgpu_cs_sync_flush(rcs);
/* Swap command streams. "cst" is going to be submitted. */
cs->csc = cs->cst;
cs->cst = cur;
/* Submit. */
- if ((flags & RADEON_FLUSH_ASYNC) &&
- util_queue_is_initialized(&ws->cs_queue)) {
- util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed,
- amdgpu_cs_submit_ib, NULL);
- } else {
- amdgpu_cs_submit_ib(cs, 0);
- error_code = cs->cst->error_code;
+ util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed,
+ amdgpu_cs_submit_ib, NULL);
+ /* The submission has been queued, unlock the fence now. */
+ pipe_mutex_unlock(ws->bo_fence_lock);
+
+ if (!(flags & RADEON_FLUSH_ASYNC)) {
+ amdgpu_cs_sync_flush(rcs);
+ error_code = cur->error_code;
}
} else {
amdgpu_cs_context_cleanup(cs->csc);
}
amdgpu_get_new_ib(&ws->base, cs, IB_MAIN);
if (cs->const_ib.ib_mapped)
amdgpu_get_new_ib(&ws->base, cs, IB_CONST);
if (cs->const_preamble_ib.ib_mapped)
amdgpu_get_new_ib(&ws->base, cs, IB_CONST_PREAMBLE);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index b950d37..e944e62 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -471,22 +471,20 @@ static unsigned hash_dev(void *key)
#else
return pointer_to_intptr(key);
#endif
}
static int compare_dev(void *key1, void *key2)
{
return key1 != key2;
}
-DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true)
-
static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
{
struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
bool destroy;
/* When the reference counter drops to zero, remove the device pointer
* from the table.
* This must happen while the mutex is locked, so that
* amdgpu_winsys_create in another thread doesn't get the winsys
* from the table when the counter drops to 0. */
@@ -577,22 +575,25 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
ws->base.read_registers = amdgpu_read_registers;
amdgpu_bo_init_functions(ws);
amdgpu_cs_init_functions(ws);
amdgpu_surface_init_functions(ws);
LIST_INITHEAD(&ws->global_bo_list);
pipe_mutex_init(ws->global_bo_list_lock);
pipe_mutex_init(ws->bo_fence_lock);
- if (sysconf(_SC_NPROCESSORS_ONLN) > 1 && debug_get_option_thread())
- util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1);
+ if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1)) {
+ amdgpu_winsys_destroy(&ws->base);
+ pipe_mutex_unlock(dev_tab_mutex);
+ return NULL;
+ }
/* Create the screen at the end. The winsys must be initialized
* completely.
*
* Alternatively, we could create the screen based on "ws->gen"
* and link all drivers into one binary blob. */
ws->base.screen = screen_create(&ws->base);
if (!ws->base.screen) {
amdgpu_winsys_destroy(&ws->base);
pipe_mutex_unlock(dev_tab_mutex);
--
2.7.4
More information about the mesa-dev
mailing list