[Mesa-dev] [PATCH 15/20] radeonsi: add infrastr. for compiling optimized shader variants asynchronously
Marek Olšák
maraeo at gmail.com
Wed Nov 16 18:38:38 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_shader.h | 7 ++
src/gallium/drivers/radeonsi/si_state_shaders.c | 136 ++++++++++++++++++------
2 files changed, 109 insertions(+), 34 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 2ed0cb7..38aa361 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -428,20 +428,24 @@ struct si_shader_key {
/* Flags for monolithic compilation only. */
union {
struct {
/* One pair of bits for every input: SI_FIX_FETCH_* enums. */
uint32_t fix_fetch;
} vs;
struct {
uint64_t inputs_to_copy; /* for fixed-func TCS */
} tcs;
} mono;
+
+ /* Optimization flags for asynchronous compilation only. */
+ union {
+ } opt;
};
struct si_shader_config {
unsigned num_sgprs;
unsigned num_vgprs;
unsigned spilled_sgprs;
unsigned spilled_vgprs;
unsigned lds_size;
unsigned spi_ps_input_ena;
unsigned spi_ps_input_addr;
@@ -477,21 +481,24 @@ struct si_shader {
struct si_shader_selector *selector;
struct si_shader *next_variant;
struct si_shader_part *prolog;
struct si_shader_part *epilog;
struct si_pm4_state *pm4;
struct r600_resource *bo;
struct r600_resource *scratch_bo;
struct si_shader_key key;
+ struct util_queue_fence optimized_ready;
+ bool compilation_failed;
bool is_monolithic;
+ bool is_optimized;
bool is_binary_shared;
bool is_gs_copy_shader;
/* The following data is all that's needed for binary shaders. */
struct radeon_shader_binary binary;
struct si_shader_config config;
struct si_shader_info info;
/* Shader key + LLVM IR + disassembly + statistics.
* Generated for debug contexts only.
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 72d0518..00ccbbd 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1008,118 +1008,182 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
}
key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
break;
}
default:
assert(0);
}
}
+static void si_build_shader_variant(void *job, int thread_index)
+{
+ struct si_shader *shader = (struct si_shader *)job;
+ struct si_shader_selector *sel = shader->selector;
+ struct si_screen *sscreen = sel->screen;
+ LLVMTargetMachineRef tm;
+ struct pipe_debug_callback *debug = &sel->debug;
+ int r;
+
+ if (thread_index >= 0) {
+ assert(thread_index < ARRAY_SIZE(sscreen->tm));
+ tm = sscreen->tm[thread_index];
+ if (!debug->async)
+ debug = NULL;
+ } else {
+ tm = sel->tm;
+ }
+
+ r = si_shader_create(sscreen, tm, shader, debug);
+ if (unlikely(r)) {
+ R600_ERR("Failed to build shader variant (type=%u) %d\n",
+ sel->type, r);
+ shader->compilation_failed = true;
+ return;
+ }
+
+ if (sel->is_debug_context) {
+ FILE *f = open_memstream(&shader->shader_log,
+ &shader->shader_log_size);
+ if (f) {
+ si_shader_dump(sscreen, shader, NULL, sel->type, f);
+ fclose(f);
+ }
+ }
+
+ si_shader_init_pm4_state(sscreen, shader);
+}
+
/* Select the hw shader variant depending on the current state. */
static int si_shader_select_with_key(struct si_screen *sscreen,
struct si_shader_ctx_state *state,
struct si_shader_key *key,
- LLVMTargetMachineRef tm,
- struct pipe_debug_callback *debug,
- bool wait,
- bool is_debug_context)
+ int thread_index)
{
static const struct si_shader_key zeroed;
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
struct si_shader *iter, *shader = NULL;
- int r;
-
+again:
/* Check if we don't need to change anything.
* This path is also used for most shaders that don't need multiple
* variants, it will cost just a computation of the key and this
* test. */
- if (likely(current && memcmp(¤t->key, key, sizeof(*key)) == 0))
+ if (likely(current &&
+ memcmp(¤t->key, key, sizeof(*key)) == 0 &&
+ (!current->is_optimized ||
+ util_queue_fence_is_signalled(¤t->optimized_ready))))
return 0;
/* This must be done before the mutex is locked, because async GS
* compilation calls this function too, and therefore must enter
* the mutex first.
+ *
+ * Only wait if we are in a draw call. Don't wait if we are
+ * in a compiler thread.
*/
- if (wait)
+ if (thread_index < 0)
util_queue_job_wait(&sel->ready);
pipe_mutex_lock(sel->mutex);
/* Find the shader variant. */
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
/* Don't check the "current" shader. We checked it above. */
if (current != iter &&
memcmp(&iter->key, key, sizeof(*key)) == 0) {
+ /* If it's an optimized shader and its compilation has
+ * been started but isn't done, use the unoptimized
+ * shader so as not to cause a stall due to compilation.
+ */
+ if (iter->is_optimized &&
+ !util_queue_fence_is_signalled(&iter->optimized_ready)) {
+ memset(&key->opt, 0, sizeof(key->opt));
+ pipe_mutex_unlock(sel->mutex);
+ goto again;
+ }
+
+ if (iter->compilation_failed) {
+ pipe_mutex_unlock(sel->mutex);
+ return -1; /* skip the draw call */
+ }
+
state->current = iter;
pipe_mutex_unlock(sel->mutex);
return 0;
}
}
/* Build a new shader. */
shader = CALLOC_STRUCT(si_shader);
if (!shader) {
pipe_mutex_unlock(sel->mutex);
return -ENOMEM;
}
shader->selector = sel;
shader->key = *key;
+
+ /* Monolithic-only shaders don't make a distinction between optimized
+ * and unoptimized. */
shader->is_monolithic =
!sel->main_shader_part ||
sel->main_shader_part->key.as_ls != key->as_ls ||
sel->main_shader_part->key.as_es != key->as_es ||
+ memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0 ||
memcmp(&key->mono, &zeroed.mono, sizeof(key->mono)) != 0;
- r = si_shader_create(sscreen, tm, shader, debug);
- if (unlikely(r)) {
- R600_ERR("Failed to build shader variant (type=%u) %d\n",
- sel->type, r);
- FREE(shader);
- pipe_mutex_unlock(sel->mutex);
- return r;
- }
-
- if (is_debug_context) {
- FILE *f = open_memstream(&shader->shader_log,
- &shader->shader_log_size);
- if (f) {
- si_shader_dump(sscreen, shader, NULL, sel->type, f);
- fclose(f);
- }
- }
-
- si_shader_init_pm4_state(sscreen, shader);
+ shader->is_optimized =
+ !sscreen->use_monolithic_shaders &&
+ memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
+ if (shader->is_optimized)
+ util_queue_fence_init(&shader->optimized_ready);
if (!sel->last_variant) {
sel->first_variant = shader;
sel->last_variant = shader;
} else {
sel->last_variant->next_variant = shader;
sel->last_variant = shader;
}
- state->current = shader;
+
+ /* If it's an optimized shader, compile it asynchronously. */
+ if (shader->is_optimized &&
+ thread_index < 0) {
+ /* Compile it asynchronously. */
+ util_queue_add_job(&sscreen->shader_compiler_queue,
+ shader, &shader->optimized_ready,
+ si_build_shader_variant, NULL);
+
+ /* Use the default (unoptimized) shader for now. */
+ memset(&key->opt, 0, sizeof(key->opt));
+ pipe_mutex_unlock(sel->mutex);
+ goto again;
+ }
+
+ assert(!shader->is_optimized);
+ si_build_shader_variant(shader, thread_index);
+
+ if (!shader->compilation_failed)
+ state->current = shader;
+
pipe_mutex_unlock(sel->mutex);
- return 0;
+ return shader->compilation_failed ? -1 : 0;
}
static int si_shader_select(struct pipe_context *ctx,
struct si_shader_ctx_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_key key;
si_shader_selector_key(ctx, state->cso, &key);
- return si_shader_select_with_key(sctx->screen, state, &key,
- sctx->tm, &sctx->b.debug, true,
- sctx->is_debug);
+ return si_shader_select_with_key(sctx->screen, state, &key, -1);
}
static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
struct si_shader_key *key)
{
unsigned next_shader = info->properties[TGSI_PROPERTY_NEXT_SHADER];
switch (info->processor) {
case PIPE_SHADER_VERTEX:
switch (next_shader) {
@@ -1240,22 +1304,21 @@ void si_init_shader_selector_async(void *job, int thread_index)
sel->info.uses_linear_center &&
sel->info.uses_linear_centroid;
key.part.ps.epilog.alpha_func = PIPE_FUNC_ALWAYS;
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
key.part.ps.epilog.spi_shader_col_format |=
V_028710_SPI_SHADER_FP16_ABGR << (i * 4);
break;
}
- if (si_shader_select_with_key(sscreen, &state, &key, tm, debug,
- false, sel->is_debug_context))
+ if (si_shader_select_with_key(sscreen, &state, &key, thread_index))
fprintf(stderr, "radeonsi: can't create a monolithic shader\n");
}
/* The GS copy shader is always pre-compiled. */
if (sel->type == PIPE_SHADER_GEOMETRY) {
sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, tm, sel, debug);
if (!sel->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
return;
}
@@ -1517,20 +1580,25 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
return;
sctx->ps_shader.cso = sel;
sctx->ps_shader.current = sel ? sel->first_variant : NULL;
sctx->do_update_shaders = true;
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
{
+ if (shader->is_optimized) {
+ util_queue_job_wait(&shader->optimized_ready);
+ util_queue_fence_destroy(&shader->optimized_ready);
+ }
+
if (shader->pm4) {
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
if (shader->key.as_ls)
si_pm4_delete_state(sctx, ls, shader->pm4);
else if (shader->key.as_es)
si_pm4_delete_state(sctx, es, shader->pm4);
else
si_pm4_delete_state(sctx, vs, shader->pm4);
break;
--
2.7.4
More information about the mesa-dev
mailing list