[Mesa-dev] [PATCH 2/2] winsys/radeon: use the common job queue for multithreaded command submission v2

Nicolai Hähnle nhaehnle at gmail.com
Wed Jun 15 07:08:52 UTC 2016


On 14.06.2016 19:06, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> v2: fixup after renaming to util_queue_fence
> ---
>   src/gallium/winsys/radeon/drm/radeon_drm_cs.c     | 22 ++++----
>   src/gallium/winsys/radeon/drm/radeon_drm_cs.h     |  4 +-
>   src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 63 ++---------------------
>   src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 12 ++---
>   4 files changed, 19 insertions(+), 82 deletions(-)
>
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> index e9ab53d..9552bd5 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
> @@ -177,7 +177,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
>       if (!cs) {
>           return NULL;
>       }
> -    pipe_semaphore_init(&cs->flush_completed, 1);
> +    util_queue_fence_init(&cs->flush_completed);
>
>       cs->ws = ws;
>       cs->flush_cs = flush;
> @@ -427,8 +427,9 @@ static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
>       return cs->csc->crelocs;
>   }
>
> -void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
> +void radeon_drm_cs_emit_ioctl_oneshot(void *job)
>   {
> +    struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
>       unsigned i;
>       int r;
>
> @@ -463,11 +464,9 @@ void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
>   {
>       struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
>
> -    /* Wait for any pending ioctl to complete. */
> -    if (cs->ws->thread) {
> -        pipe_semaphore_wait(&cs->flush_completed);
> -        pipe_semaphore_signal(&cs->flush_completed);
> -    }
> +    /* Wait for any pending ioctl of this CS to complete. */
> +    if (util_queue_is_initialized(&cs->ws->cs_queue))
> +        util_queue_job_wait(&cs->flush_completed);
>   }
>
>   DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
> @@ -586,13 +585,12 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
>               break;
>           }
>
> -        if (cs->ws->thread) {
> -            pipe_semaphore_wait(&cs->flush_completed);
> -            radeon_drm_ws_queue_cs(cs->ws, cs);
> +        if (util_queue_is_initialized(&cs->ws->cs_queue)) {
> +            util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed);
>               if (!(flags & RADEON_FLUSH_ASYNC))
>                   radeon_drm_cs_sync_flush(rcs);

Arguably it would make sense to only use the thread with 
RADEON_FLUSH_ASYNC, like in the amdgpu winsys. But maybe this is for a 
different patch. Either way, the series is

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

>           } else {
> -            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
> +            radeon_drm_cs_emit_ioctl_oneshot(cs);
>           }
>       } else {
>           radeon_cs_context_cleanup(cs->cst);
> @@ -610,7 +608,7 @@ static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
>       struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
>
>       radeon_drm_cs_sync_flush(rcs);
> -    pipe_semaphore_destroy(&cs->flush_completed);
> +    util_queue_fence_destroy(&cs->flush_completed);
>       radeon_cs_context_cleanup(&cs->csc1);
>       radeon_cs_context_cleanup(&cs->csc2);
>       p_atomic_dec(&cs->ws->num_cs);
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> index 8056e72..a5f243d 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
> @@ -78,7 +78,7 @@ struct radeon_drm_cs {
>       void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
>       void *flush_data;
>
> -    pipe_semaphore flush_completed;
> +    struct util_queue_fence flush_completed;
>   };
>
>   int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo);
> @@ -122,6 +122,6 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
>
>   void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs);
>   void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws);
> -void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc);
> +void radeon_drm_cs_emit_ioctl_oneshot(void *job);
>
>   #endif
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index 5c85c8f..1f296f4 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -534,16 +534,11 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
>   {
>       struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
>
> -    if (ws->thread) {
> -        ws->kill_thread = 1;
> -        pipe_semaphore_signal(&ws->cs_queued);
> -        pipe_thread_wait(ws->thread);
> -    }
> -    pipe_semaphore_destroy(&ws->cs_queued);
> +    if (util_queue_is_initialized(&ws->cs_queue))
> +        util_queue_destroy(&ws->cs_queue);
>
>       pipe_mutex_destroy(ws->hyperz_owner_mutex);
>       pipe_mutex_destroy(ws->cmask_owner_mutex);
> -    pipe_mutex_destroy(ws->cs_stack_lock);
>
>       pb_cache_deinit(&ws->bo_cache);
>
> @@ -686,55 +681,7 @@ static int compare_fd(void *key1, void *key2)
>              stat1.st_rdev != stat2.st_rdev;
>   }
>
> -void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs)
> -{
> -retry:
> -    pipe_mutex_lock(ws->cs_stack_lock);
> -    if (ws->ncs >= RING_LAST) {
> -        /* no room left for a flush */
> -        pipe_mutex_unlock(ws->cs_stack_lock);
> -        goto retry;
> -    }
> -    ws->cs_stack[ws->ncs++] = cs;
> -    pipe_mutex_unlock(ws->cs_stack_lock);
> -    pipe_semaphore_signal(&ws->cs_queued);
> -}
> -
> -static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param)
> -{
> -    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)param;
> -    struct radeon_drm_cs *cs;
> -    unsigned i;
> -
> -    while (1) {
> -        pipe_semaphore_wait(&ws->cs_queued);
> -        if (ws->kill_thread)
> -            break;
> -
> -        pipe_mutex_lock(ws->cs_stack_lock);
> -        cs = ws->cs_stack[0];
> -        for (i = 1; i < ws->ncs; i++)
> -            ws->cs_stack[i - 1] = ws->cs_stack[i];
> -        ws->cs_stack[--ws->ncs] = NULL;
> -        pipe_mutex_unlock(ws->cs_stack_lock);
> -
> -        if (cs) {
> -            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
> -            pipe_semaphore_signal(&cs->flush_completed);
> -        }
> -    }
> -    pipe_mutex_lock(ws->cs_stack_lock);
> -    for (i = 0; i < ws->ncs; i++) {
> -        pipe_semaphore_signal(&ws->cs_stack[i]->flush_completed);
> -        ws->cs_stack[i] = NULL;
> -    }
> -    ws->ncs = 0;
> -    pipe_mutex_unlock(ws->cs_stack_lock);
> -    return 0;
> -}
> -
>   DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
> -static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param);
>
>   static bool radeon_winsys_unref(struct radeon_winsys *ws)
>   {
> @@ -823,7 +770,6 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
>
>       pipe_mutex_init(ws->hyperz_owner_mutex);
>       pipe_mutex_init(ws->cmask_owner_mutex);
> -    pipe_mutex_init(ws->cs_stack_lock);
>
>       ws->bo_names = util_hash_table_create(handle_hash, handle_compare);
>       ws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
> @@ -836,10 +782,9 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
>       /* TTM aligns the BO size to the CPU page size */
>       ws->info.gart_page_size = sysconf(_SC_PAGESIZE);
>
> -    ws->ncs = 0;
> -    pipe_semaphore_init(&ws->cs_queued, 0);
>       if (ws->num_cpus > 1 && debug_get_option_thread())
> -        ws->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, ws);
> +        util_queue_init(&ws->cs_queue,
> +                        radeon_drm_cs_emit_ioctl_oneshot);
>
>       /* Create the screen at the end. The winsys must be initialized
>        * completely.
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> index 80de561..c429aba 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> @@ -32,7 +32,7 @@
>
>   #include "gallium/drivers/radeon/radeon_winsys.h"
>   #include "pipebuffer/pb_cache.h"
> -#include "os/os_thread.h"
> +#include "util/u_queue.h"
>   #include "util/list.h"
>   #include <radeon_drm.h>
>
> @@ -101,13 +101,8 @@ struct radeon_drm_winsys {
>       struct radeon_drm_cs *cmask_owner;
>       pipe_mutex cmask_owner_mutex;
>
> -    /* rings submission thread */
> -    pipe_mutex cs_stack_lock;
> -    pipe_semaphore cs_queued;
> -    pipe_thread thread;
> -    int kill_thread;
> -    int ncs;
> -    struct radeon_drm_cs *cs_stack[RING_LAST];
> +    /* multithreaded command submission */
> +    struct util_queue cs_queue;
>   };
>
>   static inline struct radeon_drm_winsys *
> @@ -116,7 +111,6 @@ radeon_drm_winsys(struct radeon_winsys *base)
>       return (struct radeon_drm_winsys*)base;
>   }
>
> -void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs);
>   void radeon_surface_init_functions(struct radeon_drm_winsys *ws);
>
>   #endif
>


More information about the mesa-dev mailing list