[Mesa-dev] [PATCH 20/21] mesa: add support for threaded glCompileShader
Chia-I Wu
olvaffe at gmail.com
Sun May 4 14:37:57 PDT 2014
On Sat, May 3, 2014 at 1:59 AM, Ian Romanick <idr at freedesktop.org> wrote:
> On 04/22/2014 01:58 AM, Chia-I Wu wrote:
>> From: Chia-I Wu <olv at lunarg.com>
>>
>> Threaded glCompileShader can be enabled for a context by calling
>> _mesa_enable_glsl_threadpool. It will initialize the singleton GLSL thread
>> pool and defer glCompileShader calls to the thread pool.
>>
>> For applications to benefit from threaded glCompileShader, they have to
>> compile shaders in this fashion
>>
>> for (i = 0; i < num_shaders; i++)
>> glCompileShader(shaders[i]);
>> for (i = 0; i < num_shaders; i++)
>> glGetShaderiv(shaders[i], GL_COMPILE_STATUS, &val);
>
> I think when you try this series on some real applications, you will be
> disappointed. Eric had a pretty similar branch
> (http://cgit.freedesktop.org/~anholt/mesa/log/?h=compiler-threads), but
> it saw no benefit on any applications... because everybody does
>
> for (i = 0; i < num_shaders; i++) {
> glCompileShader(shaders[i]);
> glGetShaderiv(shaders[i], GL_COMPILE_STATUS, &val);
> }
>
> or
>
> for (i = 0; i < num_shaders; i++) {
> glCompileShader(shaders[i]);
> glAttachShader(prog, shaders[i]);
> }
>
> glLinkProgram(prog);
Yeah, I am aware of the situation with real-world applications. Only
applications that are modified to not immediately check compilation results
will get the speed up in compile times. That is why this feature needs to be
enabled through drirc. We, at LunarG, are working with major game engines
vendors to ensure this performance benefit is realized.
> I'm also curious about your test case... did you link the shaders? As
> far as I'm aware, the bulk of time spent in the compiler happens during
> linking (final optimizations and register allocation). Eric's data
> (http://lists.freedesktop.org/archives/mesa-dev/2014-April/057494.html)
> says we spend more than 2x time in linking than in compiling.
No, I did not. In my other experiment with Unigine Tropics, the
distribution of time was more like
glCompileShader: 50%
glLinkProgram FE: 25%
glLinkProgram BE: 25%
But, yeah, I will modify my test case to collect more numbers. Given
Pierre-Loup's numbers on DOTA2, we could expect much more reduction of
shader compiling/linking time if Eric's data are correct.
Despite all that, if this series is to be accepted after concerns are resolved,
what I plan to do based on it are
* defer FE work in glLinkProgram to the thread pool
* defer glCreateShaderProgramv
* defer BE work in glLinkProgram to the thread pool
The priorities will be driven by where CPU time is spent.
I am sorry I wasn't aware of the mail thread you pointed out. I would
have responded to it if I knew. I am still on the road and away from
my desktop. I will read the thread carefully when I get to sit down
and work.
>> As each glGetShaderiv call will force mesa to wait for the deferred
>> glCompileShader to complete, compiling shaders in the traditional way will
>> defeat this feature. This is also why the feature needs to be explicitly
>> enabled with _mesa_enable_glsl_threadpool.
>>
>> Signed-off-by: Chia-I Wu <olv at lunarg.com>
>> ---
>> src/glsl/glsl_parser_extras.cpp | 4 +++
>> src/glsl/threadpool.c | 72 +++++++++++++++++++++++++++++++++++++++++
>> src/glsl/threadpool.h | 9 ++++++
>> src/mesa/main/context.c | 25 ++++++++++++++
>> src/mesa/main/context.h | 3 ++
>> src/mesa/main/mtypes.h | 13 ++++++++
>> src/mesa/main/shaderapi.c | 48 +++++++++++++++++++++++++--
>> src/mesa/main/shaderobj.c | 20 ++++++++++++
>> src/mesa/main/shaderobj.h | 2 ++
>> 9 files changed, 194 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
>> index 30e284b..c035474 100644
>> --- a/src/glsl/glsl_parser_extras.cpp
>> +++ b/src/glsl/glsl_parser_extras.cpp
>> @@ -37,6 +37,7 @@ extern "C" {
>> #include "glsl_parser.h"
>> #include "ir_optimization.h"
>> #include "loop_analysis.h"
>> +#include "threadpool.h"
>>
>> /**
>> * Format a short human-readable description of the given GLSL version.
>> @@ -1567,6 +1568,8 @@ extern "C" {
>> void
>> _mesa_destroy_shader_compiler(void)
>> {
>> + _mesa_glsl_destroy_threadpool();
>> +
>> _mesa_destroy_shader_compiler_caches();
>>
>> _mesa_glsl_release_types();
>> @@ -1580,6 +1583,7 @@ _mesa_destroy_shader_compiler(void)
>> void
>> _mesa_destroy_shader_compiler_caches(void)
>> {
>> + _mesa_glsl_wait_threadpool();
>> _mesa_glsl_release_builtin_functions();
>> }
>>
>> diff --git a/src/glsl/threadpool.c b/src/glsl/threadpool.c
>> index 0aaac3a..3d54ec0 100644
>> --- a/src/glsl/threadpool.c
>> +++ b/src/glsl/threadpool.c
>> @@ -55,6 +55,7 @@ struct _mesa_threadpool_task {
>> struct _mesa_threadpool {
>> mtx_t mutex;
>> int refcnt;
>> + bool shutdown;
>>
>> enum _mesa_threadpool_control thread_control;
>> thrd_t *threads;
>> @@ -158,6 +159,12 @@ _mesa_threadpool_queue_task(struct _mesa_threadpool *pool,
>>
>> mtx_lock(&pool->mutex);
>>
>> + if (unlikely(pool->shutdown)) {
>> + mtx_unlock(&pool->mutex);
>> + free(task);
>> + return NULL;
>> + }
>> +
>> while (unlikely(pool->thread_control != MESA_THREADPOOL_NORMAL))
>> cnd_wait(&pool->thread_joined, &pool->mutex);
>>
>> @@ -297,6 +304,17 @@ _mesa_threadpool_join(struct _mesa_threadpool *pool, bool graceful)
>> }
>>
>> /**
>> + * After this call, no task can be queued.
>> + */
>> +static void
>> +_mesa_threadpool_set_shutdown(struct _mesa_threadpool *pool)
>> +{
>> + mtx_lock(&pool->mutex);
>> + pool->shutdown = true;
>> + mtx_unlock(&pool->mutex);
>> +}
>> +
>> +/**
>> * Decrease the reference count. Destroy \p pool when the reference count
>> * reaches zero.
>> */
>> @@ -392,3 +410,57 @@ _mesa_threadpool_create(int max_threads)
>>
>> return pool;
>> }
>> +
>> +static mtx_t threadpool_lock = _MTX_INITIALIZER_NP;
>> +static struct _mesa_threadpool *threadpool;
>> +
>> +/**
>> + * Get the singleton GLSL thread pool. \p max_threads is honored only by the
>> + * first call to this function.
>> + */
>> +struct _mesa_threadpool *
>> +_mesa_glsl_get_threadpool(int max_threads)
>> +{
>> + mtx_lock(&threadpool_lock);
>> + if (!threadpool)
>> + threadpool = _mesa_threadpool_create(max_threads);
>> + if (threadpool)
>> + _mesa_threadpool_ref(threadpool);
>> + mtx_unlock(&threadpool_lock);
>> +
>> + return threadpool;
>> +}
>> +
>> +/**
>> + * Wait until all tasks are completed and threads are joined.
>> + */
>> +void
>> +_mesa_glsl_wait_threadpool(void)
>> +{
>> + mtx_lock(&threadpool_lock);
>> + if (threadpool)
>> + _mesa_threadpool_join(threadpool, true);
>> + mtx_unlock(&threadpool_lock);
>> +}
>> +
>> +/**
>> + * Destroy the GLSL thread pool.
>> + */
>> +void
>> +_mesa_glsl_destroy_threadpool(void)
>> +{
>> + mtx_lock(&threadpool_lock);
>> + if (threadpool) {
>> + /*
>> + * This is called from _mesa_destroy_shader_compiler(). No new task is
>> + * allowed since this point. But contexts, who also own references to
>> + * the pool, can still complete tasks that have been queued.
>> + */
>> + _mesa_threadpool_set_shutdown(threadpool);
>> +
>> + _mesa_threadpool_join(threadpool, false);
>> + _mesa_threadpool_unref(threadpool);
>> + threadpool = NULL;
>> + }
>> + mtx_unlock(&threadpool_lock);
>> +}
>> diff --git a/src/glsl/threadpool.h b/src/glsl/threadpool.h
>> index 9447115..59ea260 100644
>> --- a/src/glsl/threadpool.h
>> +++ b/src/glsl/threadpool.h
>> @@ -55,6 +55,15 @@ bool
>> _mesa_threadpool_complete_task(struct _mesa_threadpool *pool,
>> struct _mesa_threadpool_task *task);
>>
>> +struct _mesa_threadpool *
>> +_mesa_glsl_get_threadpool(int max_threads);
>> +
>> +void
>> +_mesa_glsl_wait_threadpool(void);
>> +
>> +void
>> +_mesa_glsl_destroy_threadpool(void);
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
>> index 860ae86..86d9176 100644
>> --- a/src/mesa/main/context.c
>> +++ b/src/mesa/main/context.c
>> @@ -112,6 +112,7 @@
>> #include "points.h"
>> #include "polygon.h"
>> #include "queryobj.h"
>> +#include "shaderapi.h"
>> #include "syncobj.h"
>> #include "rastpos.h"
>> #include "remap.h"
>> @@ -139,6 +140,7 @@
>> #endif
>>
>> #include "glsl_parser_extras.h"
>> +#include "threadpool.h"
>> #include <stdbool.h>
>>
>>
>> @@ -1184,6 +1186,23 @@ _mesa_create_context(gl_api api,
>> }
>> }
>>
>> +void
>> +_mesa_enable_glsl_threadpool(struct gl_context *ctx, int max_threads)
>> +{
>> + if (!ctx->ThreadPool)
>> + ctx->ThreadPool = _mesa_glsl_get_threadpool(max_threads);
>> +}
>> +
>> +static void
>> +complete_shader_task_cb(GLuint id, void *data, void *userData)
>> +{
>> + struct gl_context *ctx = (struct gl_context *) userData;
>> + struct gl_shader *sh = (struct gl_shader *) data;
>> +
>> + if (_mesa_validate_shader_target(ctx, sh->Type) &&
>> + sh->Task && sh->TaskData == (void *) ctx)
>> + _mesa_complete_shader_task(ctx, sh);
>> +}
>>
>> /**
>> * Free the data associated with the given context.
>> @@ -1202,6 +1221,12 @@ _mesa_free_context_data( struct gl_context *ctx )
>> _mesa_make_current(ctx, NULL, NULL);
>> }
>>
>> + if (ctx->ThreadPool) {
>> + _mesa_HashWalk(ctx->Shared->ShaderObjects, complete_shader_task_cb, ctx);
>> + _mesa_threadpool_unref(ctx->ThreadPool);
>> + ctx->ThreadPool = NULL;
>> + }
>> +
>> /* unreference WinSysDraw/Read buffers */
>> _mesa_reference_framebuffer(&ctx->WinSysDrawBuffer, NULL);
>> _mesa_reference_framebuffer(&ctx->WinSysReadBuffer, NULL);
>> diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h
>> index 792ab4c..b23f9fa 100644
>> --- a/src/mesa/main/context.h
>> +++ b/src/mesa/main/context.h
>> @@ -118,6 +118,9 @@ _mesa_create_context(gl_api api,
>> const struct dd_function_table *driverFunctions);
>>
>> extern void
>> +_mesa_enable_glsl_threadpool(struct gl_context *ctx, int max_threads);
>> +
>> +extern void
>> _mesa_free_context_data( struct gl_context *ctx );
>>
>> extern void
>> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
>> index f94e7f6..d2ffe50 100644
>> --- a/src/mesa/main/mtypes.h
>> +++ b/src/mesa/main/mtypes.h
>> @@ -71,6 +71,8 @@ typedef GLuint64 GLbitfield64;
>> */
>> /*@{*/
>> struct _mesa_HashTable;
>> +struct _mesa_threadpool;
>> +struct _mesa_threadpool_task;
>> struct gl_attrib_node;
>> struct gl_list_extensions;
>> struct gl_meta_state;
>> @@ -2493,6 +2495,14 @@ struct gl_shader
>> */
>> unsigned LocalSize[3];
>> } Comp;
>> +
>> + /**
>> + * Deferred task of glCompileShader. We should extend the mutex, not only
>> + * to protect the deferred task, but to protect the entire gl_shader.
>> + */
>> + mtx_t Mutex;
>> + struct _mesa_threadpool_task *Task;
>> + void *TaskData;
>> };
>>
>>
>> @@ -4286,6 +4296,9 @@ struct gl_context
>> * Once this field becomes true, it is never reset to false.
>> */
>> GLboolean ShareGroupReset;
>> +
>> + /* A thread pool for threaded shader compilation */
>> + struct _mesa_threadpool *ThreadPool;
>> };
>>
>>
>> diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
>> index 1c8e6b4..3bd3b30 100644
>> --- a/src/mesa/main/shaderapi.c
>> +++ b/src/mesa/main/shaderapi.c
>> @@ -53,6 +53,7 @@
>> #include "program/prog_print.h"
>> #include "program/prog_parameter.h"
>> #include "ralloc.h"
>> +#include "threadpool.h"
>> #include <stdbool.h>
>> #include "../glsl/glsl_parser_extras.h"
>> #include "../glsl/ir.h"
>> @@ -727,6 +728,8 @@ get_shaderiv(struct gl_context *ctx, GLuint name, GLenum pname, GLint *params)
>> return;
>> }
>>
>> + _mesa_complete_shader_task(ctx, shader);
>> +
>> switch (pname) {
>> case GL_SHADER_TYPE:
>> *params = shader->Type;
>> @@ -773,6 +776,7 @@ get_shader_info_log(struct gl_context *ctx, GLuint shader, GLsizei bufSize,
>> _mesa_error(ctx, GL_INVALID_VALUE, "glGetShaderInfoLog(shader)");
>> return;
>> }
>> + _mesa_complete_shader_task(ctx, sh);
>> _mesa_copy_string(infoLog, bufSize, length, sh->InfoLog);
>> }
>>
>> @@ -806,6 +810,8 @@ shader_source(struct gl_context *ctx, GLuint shader, const GLchar *source)
>> if (!sh)
>> return;
>>
>> + _mesa_complete_shader_task(ctx, sh);
>> +
>> /* free old shader source string and install new one */
>> free((void *)sh->Source);
>> sh->Source = source;
>> @@ -815,6 +821,37 @@ shader_source(struct gl_context *ctx, GLuint shader, const GLchar *source)
>> #endif
>> }
>>
>> +static void
>> +deferred_compile_shader(void *data)
>> +{
>> + struct gl_shader *sh = (struct gl_shader *) data;
>> + struct gl_context *ctx = (struct gl_context *) sh->TaskData;
>> +
>> + _mesa_glsl_compile_shader(ctx, sh, false, false);
>> +}
>> +
>> +static bool
>> +queue_compile_shader(struct gl_context *ctx, struct gl_shader *sh)
>> +{
>> + if (!ctx->ThreadPool)
>> + return false;
>> +
>> + /* MESA_GLSL is set */
>> + if (ctx->_Shader->Flags)
>> + return false;
>> +
>> + /* context requires synchronized compiler warnings and errors */
>> + if (ctx->Debug && ctx->Debug->SyncOutput)
>> + return false;
>> +
>> + sh->TaskData = (void *) ctx;
>> + sh->Task = _mesa_threadpool_queue_task(ctx->ThreadPool,
>> + deferred_compile_shader, (void *) sh);
>> + if (!sh->Task)
>> + sh->TaskData = NULL;
>> +
>> + return (sh->Task != NULL);
>> +}
>>
>> /**
>> * Compile a shader.
>> @@ -847,10 +884,13 @@ compile_shader(struct gl_context *ctx, GLuint shaderObj)
>> fflush(stderr);
>> }
>>
>> + _mesa_complete_shader_task(ctx, sh);
>> +
>> /* this call will set the shader->CompileStatus field to indicate if
>> * compilation was successful.
>> */
>> - _mesa_glsl_compile_shader(ctx, sh, false, false);
>> + if (!queue_compile_shader(ctx, sh))
>> + _mesa_glsl_compile_shader(ctx, sh, false, false);
>>
>> if (ctx->_Shader->Flags & GLSL_LOG) {
>> _mesa_write_shader_to_file(sh);
>> @@ -873,7 +913,7 @@ compile_shader(struct gl_context *ctx, GLuint shaderObj)
>>
>> }
>>
>> - if (!sh->CompileStatus) {
>> + if (ctx->_Shader->Flags && !sh->CompileStatus) {
>> if (ctx->_Shader->Flags & GLSL_DUMP_ON_ERROR) {
>> fprintf(stderr, "GLSL source for %s shader %d:\n",
>> _mesa_shader_stage_to_string(sh->Stage), sh->Name);
>> @@ -897,6 +937,7 @@ static void
>> link_program(struct gl_context *ctx, GLuint program)
>> {
>> struct gl_shader_program *shProg;
>> + int i;
>>
>> shProg = _mesa_lookup_shader_program_err(ctx, program, "glLinkProgram");
>> if (!shProg)
>> @@ -915,6 +956,9 @@ link_program(struct gl_context *ctx, GLuint program)
>>
>> FLUSH_VERTICES(ctx, _NEW_PROGRAM);
>>
>> + for (i = 0; i < shProg->NumShaders; i++)
>> + _mesa_complete_shader_task(ctx, shProg->Shaders[i]);
>> +
>> _mesa_glsl_link_shader(ctx, shProg);
>>
>> if (shProg->LinkStatus == GL_FALSE &&
>> diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
>> index b0f0bfa..761b45d 100644
>> --- a/src/mesa/main/shaderobj.c
>> +++ b/src/mesa/main/shaderobj.c
>> @@ -41,6 +41,7 @@
>> #include "program/prog_parameter.h"
>> #include "program/hash_table.h"
>> #include "ralloc.h"
>> +#include "threadpool.h"
>>
>> /**********************************************************************/
>> /*** Shader object functions ***/
>> @@ -95,6 +96,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
>> void
>> _mesa_init_shader(struct gl_context *ctx, struct gl_shader *shader)
>> {
>> + mtx_init(&shader->Mutex, mtx_plain);
>> shader->RefCount = 1;
>> }
>>
>> @@ -125,9 +127,12 @@ _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
>> static void
>> _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
>> {
>> + _mesa_complete_shader_task(ctx, sh);
>> +
>> free((void *)sh->Source);
>> free(sh->Label);
>> _mesa_reference_program(ctx, &sh->Program, NULL);
>> + mtx_destroy(&sh->Mutex);
>> ralloc_free(sh);
>> }
>>
>> @@ -179,6 +184,21 @@ _mesa_lookup_shader_err(struct gl_context *ctx, GLuint name, const char *caller)
>> }
>> }
>>
>> +void
>> +_mesa_complete_shader_task(struct gl_context *ctx, struct gl_shader *sh)
>> +{
>> + mtx_lock(&sh->Mutex);
>> +
>> + if (sh->Task) {
>> + struct gl_context *task_ctx = (struct gl_context *) sh->TaskData;
>> +
>> + _mesa_threadpool_complete_task(task_ctx->ThreadPool, sh->Task);
>> + sh->Task = NULL;
>> + sh->TaskData = NULL;
>> + }
>> +
>> + mtx_unlock(&sh->Mutex);
>> +}
>>
>>
>> /**********************************************************************/
>> diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h
>> index fae8be8..cc590bc 100644
>> --- a/src/mesa/main/shaderobj.h
>> +++ b/src/mesa/main/shaderobj.h
>> @@ -59,6 +59,8 @@ _mesa_lookup_shader(struct gl_context *ctx, GLuint name);
>> extern struct gl_shader *
>> _mesa_lookup_shader_err(struct gl_context *ctx, GLuint name, const char *caller);
>>
>> +extern void
>> +_mesa_complete_shader_task(struct gl_context *ctx, struct gl_shader *sh);
>>
>>
>> extern void
>
--
olv at LunarG.com
More information about the mesa-dev
mailing list