[Mesa-dev] [PATCH 04/11] util/u_queue: add an option to set the minimum thread priority
Marek Olšák
maraeo at gmail.com
Thu Jun 1 18:18:22 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/auxiliary/util/u_threaded_context.c | 2 +-
src/gallium/drivers/freedreno/freedreno_batch.c | 2 +-
src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 +-
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 +-
src/util/disk_cache.c | 2 +-
src/util/u_queue.c | 19 ++++++++++++++++++-
src/util/u_queue.h | 6 +++++-
8 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 71211e6..554cc88 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -2194,21 +2194,21 @@ threaded_context_create(struct pipe_context *pipe,
else
tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
if (!tc->base.stream_uploader || !tc->base.const_uploader)
goto fail;
/* The queue size is the number of batches "waiting". Batches are removed
* from the queue before being executed, so keep one tc_batch slot for that
* execution. Also, keep one unused slot for an unflushed batch.
*/
- if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1))
+ if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1, 0))
goto fail;
for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
tc->batch_slots[i].sentinel = TC_SENTINEL;
tc->batch_slots[i].pipe = pipe;
util_queue_fence_init(&tc->batch_slots[i].fence);
}
LIST_INITHEAD(&tc->unflushed_queries);
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index 5783ee8..33b6240 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -265,21 +265,21 @@ batch_flush(struct fd_batch *batch)
fd_batch_set_stage(batch, FD_STAGE_NULL);
fd_context_all_dirty(batch->ctx);
batch_flush_reset_dependencies(batch, true);
if (batch->ctx->screen->reorder) {
struct fd_batch *tmp = NULL;
fd_batch_reference(&tmp, batch);
if (!util_queue_is_initialized(&batch->ctx->flush_queue))
- util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1);
+ util_queue_init(&batch->ctx->flush_queue, "flush_queue", 16, 1, 0);
util_queue_add_job(&batch->ctx->flush_queue,
batch, &batch->flush_fence,
batch_flush_func, batch_cleanup_func);
} else {
fd_gmem_render_tiles(batch);
batch_reset_resources(batch);
}
debug_assert(batch->reference.count > 0);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4704304..8bf6fd9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -882,21 +882,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
}
/* Only enable as many threads as we have target machines, but at most
* the number of CPUs - 1 if there is more than one.
*/
num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
num_cpus = MAX2(1, num_cpus - 1);
num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm));
if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
- 32, num_compiler_threads)) {
+ 32, num_compiler_threads, 0)) {
si_destroy_shader_cache(sscreen);
FREE(sscreen);
return NULL;
}
si_handle_env_var_force_family(sscreen);
if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
si_init_perfcounters(sscreen);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index c8bd60e..43f2ed2 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -298,21 +298,21 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
ws->base.read_registers = amdgpu_read_registers;
amdgpu_bo_init_functions(ws);
amdgpu_cs_init_functions(ws);
amdgpu_surface_init_functions(ws);
LIST_INITHEAD(&ws->global_bo_list);
(void) mtx_init(&ws->global_bo_list_lock, mtx_plain);
(void) mtx_init(&ws->bo_fence_lock, mtx_plain);
- if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1)) {
+ if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1, 0)) {
amdgpu_winsys_destroy(&ws->base);
mtx_unlock(&dev_tab_mutex);
return NULL;
}
/* Create the screen at the end. The winsys must be initialized
* completely.
*
* Alternatively, we could create the screen based on "ws->gen"
* and link all drivers into one binary blob. */
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index a485615..9bbffa5 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -814,21 +814,21 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
(void) mtx_init(&ws->bo_handles_mutex, mtx_plain);
(void) mtx_init(&ws->bo_va_mutex, mtx_plain);
(void) mtx_init(&ws->bo_fence_lock, mtx_plain);
ws->va_offset = ws->va_start;
list_inithead(&ws->va_holes);
/* TTM aligns the BO size to the CPU page size */
ws->info.gart_page_size = sysconf(_SC_PAGESIZE);
if (ws->num_cpus > 1 && debug_get_option_thread())
- util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1);
+ util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1, 0);
/* Create the screen at the end. The winsys must be initialized
* completely.
*
* Alternatively, we could create the screen based on "ws->gen"
* and link all drivers into one binary blob. */
ws->base.screen = screen_create(&ws->base);
if (!ws->base.screen) {
radeon_winsys_destroy(&ws->base);
mtx_unlock(&fd_tab_mutex);
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 138d7ec..b222987 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -335,21 +335,21 @@ disk_cache_create(const char *gpu_name, const char *timestamp,
}
cache->max_size = max_size;
/* A limit of 32 jobs was choosen as observations of Deus Ex start-up times
* showed that we reached at most 11 jobs on an Intel i5-6400 CPU at 2.70GHz
* (a fairly modest desktop CPU). 1 thread was chosen because we don't
* really care about getting things to disk quickly just that it's not
* blocking other tasks.
*/
- util_queue_init(&cache->cache_queue, "disk_cache", 32, 1);
+ util_queue_init(&cache->cache_queue, "disk_cache", 32, 1, 0);
/* Create driver id keys */
size_t ts_size = strlen(timestamp) + 1;
size_t gpu_name_size = strlen(gpu_name) + 1;
cache->driver_keys_blob_size = ts_size;
cache->driver_keys_blob_size += gpu_name_size;
/* We sometimes store entire structs that contains a pointers in the cache,
* use pointer size as a key to avoid hard to debug issues.
*/
diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 99de34c..32edb5e 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -140,20 +140,35 @@ util_queue_thread_func(void *input)
int thread_index = ((struct thread_input*)input)->thread_index;
free(input);
if (queue->name) {
char name[16];
util_snprintf(name, sizeof(name), "%s:%i", queue->name, thread_index);
u_thread_setname(name);
}
+ if (queue->flags & UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY) {
+#if defined(__linux__)
+ struct sched_param sched_param = {0};
+
+ /* The nice() function can only set a maximum of 19.
+ * SCHED_IDLE is the same as nice = 20.
+ *
+ * Note that Linux only allows decreasing the priority. The original
+ * priority can't be restored.
+ */
+ pthread_setschedparam(queue->threads[thread_index], SCHED_IDLE,
+ &sched_param);
+#endif
+ }
+
while (1) {
struct util_queue_job job;
mtx_lock(&queue->lock);
assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs);
/* wait if the queue is empty */
while (!queue->kill_threads && queue->num_queued == 0)
cnd_wait(&queue->has_queued_cond, &queue->lock);
@@ -190,27 +205,29 @@ util_queue_thread_func(void *input)
queue->read_idx = (queue->read_idx + queue->num_queued) % queue->max_jobs;
queue->num_queued = 0;
mtx_unlock(&queue->lock);
return 0;
}
bool
util_queue_init(struct util_queue *queue,
const char *name,
unsigned max_jobs,
- unsigned num_threads)
+ unsigned num_threads,
+ unsigned flags)
{
unsigned i;
memset(queue, 0, sizeof(*queue));
queue->name = name;
queue->num_threads = num_threads;
+ queue->flags = flags;
queue->max_jobs = max_jobs;
queue->jobs = (struct util_queue_job*)
calloc(max_jobs, sizeof(struct util_queue_job));
if (!queue->jobs)
goto fail;
(void) mtx_init(&queue->lock, mtx_plain);
queue->num_queued = 0;
diff --git a/src/util/u_queue.h b/src/util/u_queue.h
index 9876865..916802c 100644
--- a/src/util/u_queue.h
+++ b/src/util/u_queue.h
@@ -35,20 +35,22 @@
#include <string.h>
#include "util/list.h"
#include "util/u_thread.h"
#ifdef __cplusplus
extern "C" {
#endif
+#define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY (1 << 0)
+
/* Job completion fence.
* Put this into your job structure.
*/
struct util_queue_fence {
mtx_t mutex;
cnd_t cond;
int signalled;
};
typedef void (*util_queue_execute_func)(void *job, int thread_index);
@@ -62,33 +64,35 @@ struct util_queue_job {
/* Put this into your context. */
struct util_queue {
const char *name;
mtx_t lock;
cnd_t has_queued_cond;
cnd_t has_space_cond;
thrd_t *threads;
int num_queued;
unsigned num_threads;
+ unsigned flags;
int kill_threads;
int max_jobs;
int write_idx, read_idx; /* ring buffer pointers */
struct util_queue_job *jobs;
/* for cleanup at exit(), protected by exit_mutex */
struct list_head head;
};
bool util_queue_init(struct util_queue *queue,
const char *name,
unsigned max_jobs,
- unsigned num_threads);
+ unsigned num_threads,
+ unsigned flags);
void util_queue_destroy(struct util_queue *queue);
void util_queue_fence_init(struct util_queue_fence *fence);
void util_queue_fence_destroy(struct util_queue_fence *fence);
/* optional cleanup callback is called after fence is signaled: */
void util_queue_add_job(struct util_queue *queue,
void *job,
struct util_queue_fence *fence,
util_queue_execute_func execute,
util_queue_execute_func cleanup);
--
2.7.4
More information about the mesa-dev
mailing list