[Mesa-dev] [PATCH 6/7] radeonsi: use ready fences on all shaders, not just optimized ones
Nicolai Hähnle
nhaehnle at gmail.com
Sun Oct 22 18:33:43 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
There's a race condition between si_shader_select_with_key and
si_bind_XX_shader:
Thread 1 Thread 2
-------- --------
si_shader_select_with_key
begin compiling the first
variant
(guarded by sel->mutex)
si_bind_XX_shader
select first_variant by default
as state->current
si_shader_select_with_key
match state->current and early-out
Since thread 2 never takes sel->mutex, it may go on rendering without a
PM4 for that shader, for example.
The solution taken by this patch is to broaden the scope of
shader->optimized_ready to a fence shader->ready that applies to
all shaders. This does not hurt the fast path (if anything it makes
it faster, because we don't explicitly check is_optimized).
It will also allow reducing the scope of sel->mutex locks, but this is
deferred to a later commit for better bisectability.
Fixes dEQP-EGL.functional.sharing.gles2.multithread.simple.buffers.bufferdata_render
---
src/gallium/drivers/radeonsi/si_shader.c | 3 +
src/gallium/drivers/radeonsi/si_shader.h | 2 +-
src/gallium/drivers/radeonsi/si_state_shaders.c | 88 ++++++++++++++++++-------
3 files changed, 67 insertions(+), 26 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c3fe13deeaa..2eee8de54f2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5402,20 +5402,23 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
if (!outputs)
return NULL;
shader = CALLOC_STRUCT(si_shader);
if (!shader) {
FREE(outputs);
return NULL;
}
+ /* We can leave the fence as permanently signaled because the GS copy
+ * shader only becomes visible globally after it has been compiled. */
+ util_queue_fence_init(&shader->ready);
shader->selector = gs_selector;
shader->is_gs_copy_shader = true;
si_init_shader_ctx(&ctx, sscreen, tm);
ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX;
builder = ctx.ac.builder;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index ebe956e709e..aec71eb07d3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -590,21 +590,21 @@ struct si_shader {
struct si_shader_part *prolog;
struct si_shader *previous_stage; /* for GFX9 */
struct si_shader_part *prolog2;
struct si_shader_part *epilog;
struct si_pm4_state *pm4;
struct r600_resource *bo;
struct r600_resource *scratch_bo;
struct si_shader_key key;
- struct util_queue_fence optimized_ready;
+ struct util_queue_fence ready;
bool compilation_failed;
bool is_monolithic;
bool is_optimized;
bool is_binary_shared;
bool is_gs_copy_shader;
/* The following data is all that's needed for binary shaders. */
struct ac_shader_binary binary;
struct si_shader_config config;
struct si_shader_info info;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 9340328a72a..8c589928b8c 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1549,20 +1549,25 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
struct si_shader_key *key)
{
struct si_shader **mainp = si_get_main_shader_part(sel, key);
if (!*mainp) {
struct si_shader *main_part = CALLOC_STRUCT(si_shader);
if (!main_part)
return false;
+ /* We can leave the fence as permanently signaled because the
+ * main part becomes visible globally only after it has been
+ * compiled. */
+ util_queue_fence_init(&main_part->ready);
+
main_part->selector = sel;
main_part->key.as_es = key->as_es;
main_part->key.as_ls = key->as_ls;
if (si_compile_tgsi_shader(sscreen, compiler_state->tm,
main_part, false,
&compiler_state->debug) != 0) {
FREE(main_part);
return false;
}
@@ -1582,70 +1587,85 @@ static int si_shader_select_with_key(struct si_screen *sscreen,
struct si_shader_selector *previous_stage_sel = NULL;
struct si_shader *current = state->current;
struct si_shader *iter, *shader = NULL;
again:
/* Check if we don't need to change anything.
* This path is also used for most shaders that don't need multiple
* variants, it will cost just a computation of the key and this
* test. */
if (likely(current &&
- memcmp(¤t->key, key, sizeof(*key)) == 0 &&
- (!current->is_optimized ||
- util_queue_fence_is_signalled(¤t->optimized_ready))))
+ memcmp(¤t->key, key, sizeof(*key)) == 0)) {
+ if (unlikely(!util_queue_fence_is_signalled(¤t->ready))) {
+ if (current->is_optimized) {
+ memset(&key->opt, 0, sizeof(key->opt));
+ goto current_not_ready;
+ }
+
+ util_queue_fence_wait(¤t->ready);
+ }
+
return current->compilation_failed ? -1 : 0;
+ }
+current_not_ready:
/* This must be done before the mutex is locked, because async GS
* compilation calls this function too, and therefore must enter
* the mutex first.
*
* Only wait if we are in a draw call. Don't wait if we are
* in a compiler thread.
*/
if (thread_index < 0)
util_queue_fence_wait(&sel->ready);
mtx_lock(&sel->mutex);
/* Find the shader variant. */
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
/* Don't check the "current" shader. We checked it above. */
if (current != iter &&
memcmp(&iter->key, key, sizeof(*key)) == 0) {
- /* If it's an optimized shader and its compilation has
- * been started but isn't done, use the unoptimized
- * shader so as not to cause a stall due to compilation.
- */
- if (iter->is_optimized &&
- !util_queue_fence_is_signalled(&iter->optimized_ready)) {
- memset(&key->opt, 0, sizeof(key->opt));
- mtx_unlock(&sel->mutex);
- goto again;
+ if (unlikely(!util_queue_fence_is_signalled(&iter->ready))) {
+ /* If it's an optimized shader and its compilation has
+ * been started but isn't done, use the unoptimized
+ * shader so as not to cause a stall due to compilation.
+ */
+ if (iter->is_optimized) {
+ memset(&key->opt, 0, sizeof(key->opt));
+ mtx_unlock(&sel->mutex);
+ goto again;
+ }
+
+ util_queue_fence_wait(&iter->ready);
}
if (iter->compilation_failed) {
mtx_unlock(&sel->mutex);
return -1; /* skip the draw call */
}
state->current = iter;
mtx_unlock(&sel->mutex);
return 0;
}
}
/* Build a new shader. */
shader = CALLOC_STRUCT(si_shader);
if (!shader) {
mtx_unlock(&sel->mutex);
return -ENOMEM;
}
+
+ util_queue_fence_init(&shader->ready);
+
shader->selector = sel;
shader->key = *key;
shader->compiler_ctx_state = *compiler_state;
/* If this is a merged shader, get the first shader's selector. */
if (sscreen->b.chip_class >= GFX9) {
if (sel->type == PIPE_SHADER_TESS_CTRL)
previous_stage_sel = key->part.tcs.ls;
else if (sel->type == PIPE_SHADER_GEOMETRY)
previous_stage_sel = key->part.gs.es;
@@ -1708,49 +1728,62 @@ again:
/* Monolithic-only shaders don't make a distinction between optimized
* and unoptimized. */
shader->is_monolithic =
is_pure_monolithic ||
memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
shader->is_optimized =
!is_pure_monolithic &&
memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
- if (shader->is_optimized)
- util_queue_fence_init(&shader->optimized_ready);
-
- if (!sel->last_variant) {
- sel->first_variant = shader;
- sel->last_variant = shader;
- } else {
- sel->last_variant->next_variant = shader;
- sel->last_variant = shader;
- }
/* If it's an optimized shader, compile it asynchronously. */
if (shader->is_optimized &&
!is_pure_monolithic &&
thread_index < 0) {
/* Compile it asynchronously. */
util_queue_add_job(&sscreen->shader_compiler_queue_low_priority,
- shader, &shader->optimized_ready,
+ shader, &shader->ready,
si_build_shader_variant_low_priority, NULL);
+ /* Add only after the ready fence was reset, to guard against a
+ * race with si_bind_XX_shader. */
+ if (!sel->last_variant) {
+ sel->first_variant = shader;
+ sel->last_variant = shader;
+ } else {
+ sel->last_variant->next_variant = shader;
+ sel->last_variant = shader;
+ }
+
/* Use the default (unoptimized) shader for now. */
memset(&key->opt, 0, sizeof(key->opt));
mtx_unlock(&sel->mutex);
goto again;
}
+ /* Reset the fence before adding to the variant list. */
+ util_queue_fence_reset(&shader->ready);
+
+ if (!sel->last_variant) {
+ sel->first_variant = shader;
+ sel->last_variant = shader;
+ } else {
+ sel->last_variant->next_variant = shader;
+ sel->last_variant = shader;
+ }
+
assert(!shader->is_optimized);
si_build_shader_variant(shader, thread_index, false);
+ util_queue_fence_signal(&shader->ready);
+
if (!shader->compilation_failed)
state->current = shader;
mtx_unlock(&sel->mutex);
return shader->compilation_failed ? -1 : 0;
}
static int si_shader_select(struct pipe_context *ctx,
struct si_shader_ctx_state *state,
struct si_compiler_ctx_state *compiler_state)
@@ -1826,20 +1859,24 @@ static void si_init_shader_selector_async(void *job, int thread_index)
*/
if (!sscreen->use_monolithic_shaders) {
struct si_shader *shader = CALLOC_STRUCT(si_shader);
void *tgsi_binary = NULL;
if (!shader) {
fprintf(stderr, "radeonsi: can't allocate a main shader part\n");
return;
}
+ /* We can leave the fence signaled because use of the default
+ * main part is guarded by the selector's ready fence. */
+ util_queue_fence_init(&shader->ready);
+
shader->selector = sel;
si_parse_next_shader_property(&sel->info,
sel->so.num_outputs != 0,
&shader->key);
if (sel->tokens)
tgsi_binary = si_get_tgsi_binary(sel);
/* Try to load the shader from the shader cache. */
mtx_lock(&sscreen->shader_cache_mutex);
@@ -2439,24 +2476,25 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]))
si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
si_set_active_descriptors_for_shader(sctx, sel);
}
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
{
if (shader->is_optimized) {
util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
- &shader->optimized_ready);
- util_queue_fence_destroy(&shader->optimized_ready);
+ &shader->ready);
}
+ util_queue_fence_destroy(&shader->ready);
+
if (shader->pm4) {
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
if (shader->key.as_ls) {
assert(sctx->b.chip_class <= VI);
si_pm4_delete_state(sctx, ls, shader->pm4);
} else if (shader->key.as_es) {
assert(sctx->b.chip_class <= VI);
si_pm4_delete_state(sctx, es, shader->pm4);
} else {
--
2.11.0
More information about the mesa-dev
mailing list