[Mesa-dev] [PATCH] winsys/amdgpu: fix a race condition between fence updates and IB submissions
Nicolai Hähnle
nhaehnle at gmail.com
Fri Jan 6 12:02:38 UTC 2017
On 02.01.2017 21:20, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> The CS thread is needed to ensure proper ordering of operations and can't
> be disabled (without complicating the code).
>
> Discovered by Nine CSMT, which ended up in a deadlock.
I'm curious why the thread makes a difference for the deadlock. Why
isn't it enough in the un-threaded case to extend the scope of the
ws->bo_fence_lock to cover the submit ioctl call?
Then again, I'm happy with simplifying the code to eliminate the
un-threaded path, so...
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> ---
> src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 31 +++++++++++++++------------
> src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 9 ++++----
> 2 files changed, 22 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index 95402bf..87246f7 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -1060,25 +1060,23 @@ cleanup:
> for (i = 0; i < cs->num_slab_buffers; i++)
> p_atomic_dec(&cs->slab_buffers[i].bo->num_active_ioctls);
>
> amdgpu_cs_context_cleanup(cs);
> }
>
> /* Make sure the previous submission is completed. */
> void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
> {
> struct amdgpu_cs *cs = amdgpu_cs(rcs);
> - struct amdgpu_winsys *ws = cs->ctx->ws;
>
> /* Wait for any pending ioctl of this CS to complete. */
> - if (util_queue_is_initialized(&ws->cs_queue))
> - util_queue_job_wait(&cs->flush_completed);
> + util_queue_job_wait(&cs->flush_completed);
> }
>
> static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
> unsigned flags,
> struct pipe_fence_handle **fence)
> {
> struct amdgpu_cs *cs = amdgpu_cs(rcs);
> struct amdgpu_winsys *ws = cs->ctx->ws;
> int error_code = 0;
>
> @@ -1150,53 +1148,58 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
> cs->next_fence = NULL;
> } else {
> cur->fence = amdgpu_fence_create(cs->ctx,
> cur->request.ip_type,
> cur->request.ip_instance,
> cur->request.ring);
> }
> if (fence)
> amdgpu_fence_reference(fence, cur->fence);
>
> - /* Prepare buffers. */
> + amdgpu_cs_sync_flush(rcs);
> +
> + /* Prepare buffers.
> + *
> + * This fence must be held until the submission is queued to ensure
> + * that the order of fence dependency updates matches the order of
> + * submissions.
> + */
> pipe_mutex_lock(ws->bo_fence_lock);
> amdgpu_add_fence_dependencies(cs);
>
> num_buffers = cur->num_real_buffers;
> for (i = 0; i < num_buffers; i++) {
> struct amdgpu_winsys_bo *bo = cur->real_buffers[i].bo;
> p_atomic_inc(&bo->num_active_ioctls);
> amdgpu_add_fence(bo, cur->fence);
> }
>
> num_buffers = cur->num_slab_buffers;
> for (i = 0; i < num_buffers; i++) {
> struct amdgpu_winsys_bo *bo = cur->slab_buffers[i].bo;
> p_atomic_inc(&bo->num_active_ioctls);
> amdgpu_add_fence(bo, cur->fence);
> }
> - pipe_mutex_unlock(ws->bo_fence_lock);
> -
> - amdgpu_cs_sync_flush(rcs);
>
> /* Swap command streams. "cst" is going to be submitted. */
> cs->csc = cs->cst;
> cs->cst = cur;
>
> /* Submit. */
> - if ((flags & RADEON_FLUSH_ASYNC) &&
> - util_queue_is_initialized(&ws->cs_queue)) {
> - util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed,
> - amdgpu_cs_submit_ib, NULL);
> - } else {
> - amdgpu_cs_submit_ib(cs, 0);
> - error_code = cs->cst->error_code;
> + util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed,
> + amdgpu_cs_submit_ib, NULL);
> + /* The submission has been queued, unlock the fence now. */
> + pipe_mutex_unlock(ws->bo_fence_lock);
> +
> + if (!(flags & RADEON_FLUSH_ASYNC)) {
> + amdgpu_cs_sync_flush(rcs);
> + error_code = cur->error_code;
> }
> } else {
> amdgpu_cs_context_cleanup(cs->csc);
> }
>
> amdgpu_get_new_ib(&ws->base, cs, IB_MAIN);
> if (cs->const_ib.ib_mapped)
> amdgpu_get_new_ib(&ws->base, cs, IB_CONST);
> if (cs->const_preamble_ib.ib_mapped)
> amdgpu_get_new_ib(&ws->base, cs, IB_CONST_PREAMBLE);
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> index b950d37..e944e62 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> @@ -471,22 +471,20 @@ static unsigned hash_dev(void *key)
> #else
> return pointer_to_intptr(key);
> #endif
> }
>
> static int compare_dev(void *key1, void *key2)
> {
> return key1 != key2;
> }
>
> -DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true)
> -
> static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
> {
> struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
> bool destroy;
>
> /* When the reference counter drops to zero, remove the device pointer
> * from the table.
> * This must happen while the mutex is locked, so that
> * amdgpu_winsys_create in another thread doesn't get the winsys
> * from the table when the counter drops to 0. */
> @@ -577,22 +575,25 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
> ws->base.read_registers = amdgpu_read_registers;
>
> amdgpu_bo_init_functions(ws);
> amdgpu_cs_init_functions(ws);
> amdgpu_surface_init_functions(ws);
>
> LIST_INITHEAD(&ws->global_bo_list);
> pipe_mutex_init(ws->global_bo_list_lock);
> pipe_mutex_init(ws->bo_fence_lock);
>
> - if (sysconf(_SC_NPROCESSORS_ONLN) > 1 && debug_get_option_thread())
> - util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1);
> + if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1)) {
> + amdgpu_winsys_destroy(&ws->base);
> + pipe_mutex_unlock(dev_tab_mutex);
> + return NULL;
> + }
>
> /* Create the screen at the end. The winsys must be initialized
> * completely.
> *
> * Alternatively, we could create the screen based on "ws->gen"
> * and link all drivers into one binary blob. */
> ws->base.screen = screen_create(&ws->base);
> if (!ws->base.screen) {
> amdgpu_winsys_destroy(&ws->base);
> pipe_mutex_unlock(dev_tab_mutex);
>
More information about the mesa-dev
mailing list