[Mesa-dev] [PATCH 04/11] util/u_queue: add an option to set the minimum thread priority

Thu Jun 1 18:18:22 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/auxiliary/util/u_threaded_context.c   |  2 +-
 src/gallium/drivers/freedreno/freedreno_batch.c   |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.c            |  2 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c     |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  2 +-
 src/util/disk_cache.c                             |  2 +-
 src/util/u_queue.c                                | 19 ++++++++++++++++++-
 src/util/u_queue.h                                |  6 +++++-
 8 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 71211e6..554cc88 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -2194,21 +2194,21 @@ threaded_context_create(struct pipe_context *pipe,
    else
       tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
 
    if (!tc->base.stream_uploader || !tc->base.const_uploader)
       goto fail;
 
    /* The queue size is the number of batches "waiting". Batches are removed
     * from the queue before being executed, so keep one tc_batch slot for that
     * execution. Also, keep one unused slot for an unflushed batch.
     */
-   if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1))
+   if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1, 0))
       goto fail;
 
    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
       tc->batch_slots[i].sentinel = TC_SENTINEL;
       tc->batch_slots[i].pipe = pipe;
       util_queue_fence_init(&tc->batch_slots[i].fence);
    }
 
    LIST_INITHEAD(&tc->unflushed_queries);
 
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index 5783ee8..33b6240 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -265,21 +265,21 @@ batch_flush(struct fd_batch *batch)
 	fd_batch_set_stage(batch, FD_STAGE_NULL);
 
 	fd_context_all_dirty(batch->ctx);
 	batch_flush_reset_dependencies(batch, true);
 
 	if (batch->ctx->screen->reorder) {
 		struct fd_batch *tmp = NULL;
 		fd_batch_reference(&tmp, batch);
 
 		if (!util_queue_is_initialized(&batch->ctx->flush_queue))
-			util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1);
+			util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1, 0);
 
 		util_queue_add_job(&batch->ctx->flush_queue,
 				batch, &batch->flush_fence,
 				batch_flush_func, batch_cleanup_func);
 	} else {
 		fd_gmem_render_tiles(batch);
 		batch_reset_resources(batch);
 	}
 
 	debug_assert(batch->reference.count > 0);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4704304..8bf6fd9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -882,21 +882,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 	}
 
 	/* Only enable as many threads as we have target machines, but at most
 	 * the number of CPUs - 1 if there is more than one.
 	 */
 	num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	num_cpus = MAX2(1, num_cpus - 1);
 	num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm));
 
 	if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
-			     32, num_compiler_threads)) {
+			     32, num_compiler_threads, 0)) {
 		si_destroy_shader_cache(sscreen);
 		FREE(sscreen);
 		return NULL;
 	}
 
 	si_handle_env_var_force_family(sscreen);
 
 	if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
 		si_init_perfcounters(sscreen);
 
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index c8bd60e..43f2ed2 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -298,21 +298,21 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
    ws->base.read_registers = amdgpu_read_registers;
 
    amdgpu_bo_init_functions(ws);
    amdgpu_cs_init_functions(ws);
    amdgpu_surface_init_functions(ws);
 
    LIST_INITHEAD(&ws->global_bo_list);
    (void) mtx_init(&ws->global_bo_list_lock, mtx_plain);
    (void) mtx_init(&ws->bo_fence_lock, mtx_plain);
 
-   if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1)) {
+   if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1, 0)) {
       amdgpu_winsys_destroy(&ws->base);
       mtx_unlock(&dev_tab_mutex);
       return NULL;
    }
 
    /* Create the screen at the end. The winsys must be initialized
     * completely.
     *
     * Alternatively, we could create the screen based on "ws->gen"
     * and link all drivers into one binary blob. */
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index a485615..9bbffa5 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -814,21 +814,21 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
     (void) mtx_init(&ws->bo_handles_mutex, mtx_plain);
     (void) mtx_init(&ws->bo_va_mutex, mtx_plain);
     (void) mtx_init(&ws->bo_fence_lock, mtx_plain);
     ws->va_offset = ws->va_start;
     list_inithead(&ws->va_holes);
 
     /* TTM aligns the BO size to the CPU page size */
     ws->info.gart_page_size = sysconf(_SC_PAGESIZE);
 
     if (ws->num_cpus > 1 && debug_get_option_thread())
-        util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1);
+        util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1, 0);
 
     /* Create the screen at the end. The winsys must be initialized
      * completely.
      *
      * Alternatively, we could create the screen based on "ws->gen"
      * and link all drivers into one binary blob. */
     ws->base.screen = screen_create(&ws->base);
     if (!ws->base.screen) {
         radeon_winsys_destroy(&ws->base);
         mtx_unlock(&fd_tab_mutex);
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 138d7ec..b222987 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -335,21 +335,21 @@ disk_cache_create(const char *gpu_name, const char *timestamp,
    }
 
    cache->max_size = max_size;
 
    /* A limit of 32 jobs was choosen as observations of Deus Ex start-up times
     * showed that we reached at most 11 jobs on an Intel i5-6400 CPU at 2.70GHz
     * (a fairly modest desktop CPU). 1 thread was chosen because we don't
     * really care about getting things to disk quickly just that it's not
     * blocking other tasks.
     */
-   util_queue_init(&cache->cache_queue, "disk_cache", 32, 1);
+   util_queue_init(&cache->cache_queue, "disk_cache", 32, 1, 0);
 
    /* Create driver id keys */
    size_t ts_size = strlen(timestamp) + 1;
    size_t gpu_name_size = strlen(gpu_name) + 1;
    cache->driver_keys_blob_size = ts_size;
    cache->driver_keys_blob_size += gpu_name_size;
 
    /* We sometimes store entire structs that contains a pointers in the cache,
     * use pointer size as a key to avoid hard to debug issues.
     */
diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 99de34c..32edb5e 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -140,20 +140,35 @@ util_queue_thread_func(void *input)
    int thread_index = ((struct thread_input*)input)->thread_index;
 
    free(input);
 
    if (queue->name) {
       char name[16];
       util_snprintf(name, sizeof(name), "%s:%i", queue->name, thread_index);
       u_thread_setname(name);
    }
 
+   if (queue->flags & UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY) {
+#if defined(__linux__)
+      struct sched_param sched_param = {0};
+
+      /* The nice() function can only set a maximum of 19.
+       * SCHED_IDLE is the same as nice = 20.
+       *
+       * Note that Linux only allows decreasing the priority. The original
+       * priority can't be restored.
+       */
+      pthread_setschedparam(queue->threads[thread_index], SCHED_IDLE,
+                            &sched_param);
+#endif
+   }
+
    while (1) {
       struct util_queue_job job;
 
       mtx_lock(&queue->lock);
       assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs);
 
       /* wait if the queue is empty */
       while (!queue->kill_threads && queue->num_queued == 0)
          cnd_wait(&queue->has_queued_cond, &queue->lock);
 
@@ -190,27 +205,29 @@ util_queue_thread_func(void *input)
    queue->read_idx = (queue->read_idx + queue->num_queued) % queue->max_jobs;
    queue->num_queued = 0;
    mtx_unlock(&queue->lock);
    return 0;
 }
 
 bool
 util_queue_init(struct util_queue *queue,
                 const char *name,
                 unsigned max_jobs,
-                unsigned num_threads)
+                unsigned num_threads,
+                unsigned flags)
 {
    unsigned i;
 
    memset(queue, 0, sizeof(*queue));
    queue->name = name;
    queue->num_threads = num_threads;
+   queue->flags = flags;
    queue->max_jobs = max_jobs;
 
    queue->jobs = (struct util_queue_job*)
                  calloc(max_jobs, sizeof(struct util_queue_job));
    if (!queue->jobs)
       goto fail;
 
    (void) mtx_init(&queue->lock, mtx_plain);
 
    queue->num_queued = 0;
diff --git a/src/util/u_queue.h b/src/util/u_queue.h
index 9876865..916802c 100644
--- a/src/util/u_queue.h
+++ b/src/util/u_queue.h
@@ -35,20 +35,22 @@
 
 #include <string.h>
 
 #include "util/list.h"
 #include "util/u_thread.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY      (1 << 0)
+
 /* Job completion fence.
  * Put this into your job structure.
  */
 struct util_queue_fence {
    mtx_t mutex;
    cnd_t cond;
    int signalled;
 };
 
 typedef void (*util_queue_execute_func)(void *job, int thread_index);
@@ -62,33 +64,35 @@ struct util_queue_job {
 
 /* Put this into your context. */
 struct util_queue {
    const char *name;
    mtx_t lock;
    cnd_t has_queued_cond;
    cnd_t has_space_cond;
    thrd_t *threads;
    int num_queued;
    unsigned num_threads;
+   unsigned flags;
    int kill_threads;
    int max_jobs;
    int write_idx, read_idx; /* ring buffer pointers */
    struct util_queue_job *jobs;
 
    /* for cleanup at exit(), protected by exit_mutex */
    struct list_head head;
 };
 
 bool util_queue_init(struct util_queue *queue,
                      const char *name,
                      unsigned max_jobs,
-                     unsigned num_threads);
+                     unsigned num_threads,
+                     unsigned flags);
 void util_queue_destroy(struct util_queue *queue);
 void util_queue_fence_init(struct util_queue_fence *fence);
 void util_queue_fence_destroy(struct util_queue_fence *fence);
 
 /* optional cleanup callback is called after fence is signaled: */
 void util_queue_add_job(struct util_queue *queue,
                         void *job,
                         struct util_queue_fence *fence,
                         util_queue_execute_func execute,
                         util_queue_execute_func cleanup);
-- 
2.7.4