[Mesa-dev] [PATCH 6/6] radeonsi: always use async compiles when creating shader/compute states

Sun Oct 29 02:28:26 UTC 2017

For the series:

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Sun, Oct 22, 2017 at 8:45 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> With Gallium threaded contexts, creating shader/compute states is
> effectively a screen operation, so we should not use context state.
>
> In particular, this allows us to avoid using the context's LLVM
> TargetMachine.
>
> This isn't an issue yet because u_threaded_context filters out non-async
> debug callbacks, and we disable threaded contexts for debug contexts.
> However, we may want to change that in the future.
> ---
>  src/gallium/drivers/radeonsi/si_compute.c       | 42 +++++++++++++++----------
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 42 +++++++++++++++----------
>  2 files changed, 50 insertions(+), 34 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index e55988af4cc..3eee907d44b 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -16,20 +16,21 @@
>   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>   * USE OR OTHER DEALINGS IN THE SOFTWARE.
>   *
>   */
>
>  #include "tgsi/tgsi_parse.h"
> +#include "util/u_async_debug.h"
>  #include "util/u_memory.h"
>  #include "util/u_upload_mgr.h"
>
>  #include "amd_kernel_code_t.h"
>  #include "radeon/r600_cs.h"
>  #include "si_pipe.h"
>  #include "si_compute.h"
>  #include "sid.h"
>
>  struct dispatch_packet {
> @@ -77,28 +78,24 @@ static void code_object_to_config(const amd_kernel_code_t *code_object,
>
>  /* Asynchronous compute shader compilation. */
>  static void si_create_compute_state_async(void *job, int thread_index)
>  {
>         struct si_compute *program = (struct si_compute *)job;
>         struct si_shader *shader = &program->shader;
>         struct si_shader_selector sel;
>         LLVMTargetMachineRef tm;
>         struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
>
> -       if (thread_index >= 0) {
> -               assert(thread_index < ARRAY_SIZE(program->screen->tm));
> -               tm = program->screen->tm[thread_index];
> -               if (!debug->async)
> -                       debug = NULL;
> -       } else {
> -               tm = program->compiler_ctx_state.tm;
> -       }
> +       assert(!debug->debug_message || debug->async);
> +       assert(thread_index >= 0);
> +       assert(thread_index < ARRAY_SIZE(program->screen->tm));
> +       tm = program->screen->tm[thread_index];
>
>         memset(&sel, 0, sizeof(sel));
>
>         sel.screen = program->screen;
>         tgsi_scan_shader(program->tokens, &sel.info);
>         sel.tokens = program->tokens;
>         sel.type = PIPE_SHADER_COMPUTE;
>         sel.local_size = program->local_size;
>         si_get_active_slot_masks(&sel.info,
>                                  &program->active_const_and_shader_buffers,
> @@ -160,34 +157,45 @@ static void *si_create_compute_state(
>         program->use_code_object_v2 = HAVE_LLVM >= 0x0400 &&
>                                         cso->ir_type == PIPE_SHADER_IR_NATIVE;
>
>         if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
>                 program->tokens = tgsi_dup_tokens(cso->prog);
>                 if (!program->tokens) {
>                         FREE(program);
>                         return NULL;
>                 }
>
> -               program->compiler_ctx_state.tm = sctx->tm;
>                 program->compiler_ctx_state.debug = sctx->debug;
>                 program->compiler_ctx_state.is_debug_context = sctx->is_debug;
>                 p_atomic_inc(&sscreen->b.num_shaders_created);
>                 util_queue_fence_init(&program->ready);
>
> -               if ((sctx->debug.debug_message && !sctx->debug.async) ||
> -                   sctx->is_debug ||
> -                   si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
> -                       si_create_compute_state_async(program, -1);
> -               else
> -                       util_queue_add_job(&sscreen->shader_compiler_queue,
> -                                          program, &program->ready,
> -                                          si_create_compute_state_async, NULL);
> +               struct util_async_debug_callback async_debug;
> +               bool wait =
> +                       (sctx->debug.debug_message && !sctx->debug.async) ||
> +                       sctx->is_debug ||
> +                       si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE);
> +
> +               if (wait) {
> +                       u_async_debug_init(&async_debug);
> +                       program->compiler_ctx_state.debug = async_debug.base;
> +               }
> +
> +               util_queue_add_job(&sscreen->shader_compiler_queue,
> +                                  program, &program->ready,
> +                                  si_create_compute_state_async, NULL);
> +
> +               if (wait) {
> +                       util_queue_fence_wait(&program->ready);
> +                       u_async_debug_drain(&async_debug, &sctx->debug);
> +                       u_async_debug_cleanup(&async_debug);
> +               }
>         } else {
>                 const struct pipe_llvm_program_header *header;
>                 const char *code;
>                 header = cso->prog;
>                 code = cso->prog + sizeof(struct pipe_llvm_program_header);
>
>                 ac_elf_read(code, header->num_bytes, &program->shader.binary);
>                 if (program->use_code_object_v2) {
>                         const amd_kernel_code_t *code_object =
>                                 si_compute_get_code_object(program, 0);
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 1f6bb02a983..45b36878715 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -27,20 +27,21 @@
>
>  #include "si_pipe.h"
>  #include "sid.h"
>  #include "gfx9d.h"
>  #include "radeon/r600_cs.h"
>
>  #include "tgsi/tgsi_parse.h"
>  #include "tgsi/tgsi_ureg.h"
>  #include "util/hash_table.h"
>  #include "util/crc32.h"
> +#include "util/u_async_debug.h"
>  #include "util/u_memory.h"
>  #include "util/u_prim.h"
>
>  #include "util/disk_cache.h"
>  #include "util/mesa-sha1.h"
>  #include "ac_exp_param.h"
>
>  /* SHADER_CACHE */
>
>  /**
> @@ -1839,28 +1840,24 @@ static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
>   * there is no way to report compile failures to applications.
>   */
>  static void si_init_shader_selector_async(void *job, int thread_index)
>  {
>         struct si_shader_selector *sel = (struct si_shader_selector *)job;
>         struct si_screen *sscreen = sel->screen;
>         LLVMTargetMachineRef tm;
>         struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
>         unsigned i;
>
> -       if (thread_index >= 0) {
> -               assert(thread_index < ARRAY_SIZE(sscreen->tm));
> -               tm = sscreen->tm[thread_index];
> -               if (!debug->async)
> -                       debug = NULL;
> -       } else {
> -               tm = sel->compiler_ctx_state.tm;
> -       }
> +       assert(!debug->debug_message || debug->async);
> +       assert(thread_index >= 0);
> +       assert(thread_index < ARRAY_SIZE(sscreen->tm));
> +       tm = sscreen->tm[thread_index];
>
>         /* Compile the main shader part for use with a prolog and/or epilog.
>          * If this fails, the driver will try to compile a monolithic shader
>          * on demand.
>          */
>         if (!sscreen->use_monolithic_shaders) {
>                 struct si_shader *shader = CALLOC_STRUCT(si_shader);
>                 void *tgsi_binary = NULL;
>
>                 if (!shader) {
> @@ -2041,21 +2038,20 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
>         struct si_screen *sscreen = (struct si_screen *)ctx->screen;
>         struct si_context *sctx = (struct si_context*)ctx;
>         struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
>         int i;
>
>         if (!sel)
>                 return NULL;
>
>         pipe_reference_init(&sel->reference, 1);
>         sel->screen = sscreen;
> -       sel->compiler_ctx_state.tm = sctx->tm;
>         sel->compiler_ctx_state.debug = sctx->debug;
>         sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
>
>         sel->so = state->stream_output;
>
>         if (state->type == PIPE_SHADER_IR_TGSI) {
>                 sel->tokens = tgsi_dup_tokens(state->tokens);
>                 if (!sel->tokens) {
>                         FREE(sel);
>                         return NULL;
> @@ -2265,28 +2261,40 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
>                 sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) |
>                                           S_02880C_EXEC_ON_HIER_FAIL(1);
>         } else {
>                 /* Case 1. */
>                 sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
>         }
>
>         (void) mtx_init(&sel->mutex, mtx_plain);
>         util_queue_fence_init(&sel->ready);
>
> -       if ((sctx->debug.debug_message && !sctx->debug.async) ||
> -           sctx->is_debug ||
> -           si_can_dump_shader(&sscreen->b, sel->info.processor))
> -               si_init_shader_selector_async(sel, -1);
> -       else
> -               util_queue_add_job(&sscreen->shader_compiler_queue, sel,
> -                                   &sel->ready, si_init_shader_selector_async,
> -                                   NULL);
> +       struct util_async_debug_callback async_debug;
> +       bool wait =
> +               (sctx->debug.debug_message && !sctx->debug.async) ||
> +               sctx->is_debug ||
> +               si_can_dump_shader(&sscreen->b, sel->info.processor);
> +
> +       if (wait) {
> +               u_async_debug_init(&async_debug);
> +               sel->compiler_ctx_state.debug = async_debug.base;
> +       }
> +
> +       util_queue_add_job(&sscreen->shader_compiler_queue, sel,
> +                          &sel->ready, si_init_shader_selector_async,
> +                          NULL);
> +
> +       if (wait) {
> +               util_queue_fence_wait(&sel->ready);
> +               u_async_debug_drain(&async_debug, &sctx->debug);
> +               u_async_debug_cleanup(&async_debug);
> +       }
>
>         return sel;
>  }
>
>  static void si_update_streamout_state(struct si_context *sctx)
>  {
>         struct si_shader_selector *shader_with_so = si_get_vs(sctx)->cso;
>
>         if (!shader_with_so)
>                 return;
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev