[Mesa-dev] [PATCH 6/6] radeonsi: always use async compiles when creating shader/compute states
Marek Olšák
maraeo at gmail.com
Sun Oct 29 02:28:26 UTC 2017
For the series:
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Marek
On Sun, Oct 22, 2017 at 8:45 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> With Gallium threaded contexts, creating shader/compute states is
> effectively a screen operation, so we should not use context state.
>
> In particular, this allows us to avoid using the context's LLVM
> TargetMachine.
>
> This isn't an issue yet because u_threaded_context filters out non-async
> debug callbacks, and we disable threaded contexts for debug contexts.
> However, we may want to change that in the future.
> ---
> src/gallium/drivers/radeonsi/si_compute.c | 42 +++++++++++++++----------
> src/gallium/drivers/radeonsi/si_state_shaders.c | 42 +++++++++++++++----------
> 2 files changed, 50 insertions(+), 34 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index e55988af4cc..3eee907d44b 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -16,20 +16,21 @@
> * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
> * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> * USE OR OTHER DEALINGS IN THE SOFTWARE.
> *
> */
>
> #include "tgsi/tgsi_parse.h"
> +#include "util/u_async_debug.h"
> #include "util/u_memory.h"
> #include "util/u_upload_mgr.h"
>
> #include "amd_kernel_code_t.h"
> #include "radeon/r600_cs.h"
> #include "si_pipe.h"
> #include "si_compute.h"
> #include "sid.h"
>
> struct dispatch_packet {
> @@ -77,28 +78,24 @@ static void code_object_to_config(const amd_kernel_code_t *code_object,
>
> /* Asynchronous compute shader compilation. */
> static void si_create_compute_state_async(void *job, int thread_index)
> {
> struct si_compute *program = (struct si_compute *)job;
> struct si_shader *shader = &program->shader;
> struct si_shader_selector sel;
> LLVMTargetMachineRef tm;
> struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
>
> - if (thread_index >= 0) {
> - assert(thread_index < ARRAY_SIZE(program->screen->tm));
> - tm = program->screen->tm[thread_index];
> - if (!debug->async)
> - debug = NULL;
> - } else {
> - tm = program->compiler_ctx_state.tm;
> - }
> + assert(!debug->debug_message || debug->async);
> + assert(thread_index >= 0);
> + assert(thread_index < ARRAY_SIZE(program->screen->tm));
> + tm = program->screen->tm[thread_index];
>
> memset(&sel, 0, sizeof(sel));
>
> sel.screen = program->screen;
> tgsi_scan_shader(program->tokens, &sel.info);
> sel.tokens = program->tokens;
> sel.type = PIPE_SHADER_COMPUTE;
> sel.local_size = program->local_size;
> si_get_active_slot_masks(&sel.info,
> &program->active_const_and_shader_buffers,
> @@ -160,34 +157,45 @@ static void *si_create_compute_state(
> program->use_code_object_v2 = HAVE_LLVM >= 0x0400 &&
> cso->ir_type == PIPE_SHADER_IR_NATIVE;
>
> if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
> program->tokens = tgsi_dup_tokens(cso->prog);
> if (!program->tokens) {
> FREE(program);
> return NULL;
> }
>
> - program->compiler_ctx_state.tm = sctx->tm;
> program->compiler_ctx_state.debug = sctx->debug;
> program->compiler_ctx_state.is_debug_context = sctx->is_debug;
> p_atomic_inc(&sscreen->b.num_shaders_created);
> util_queue_fence_init(&program->ready);
>
> - if ((sctx->debug.debug_message && !sctx->debug.async) ||
> - sctx->is_debug ||
> - si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
> - si_create_compute_state_async(program, -1);
> - else
> - util_queue_add_job(&sscreen->shader_compiler_queue,
> - program, &program->ready,
> - si_create_compute_state_async, NULL);
> + struct util_async_debug_callback async_debug;
> + bool wait =
> + (sctx->debug.debug_message && !sctx->debug.async) ||
> + sctx->is_debug ||
> + si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE);
> +
> + if (wait) {
> + u_async_debug_init(&async_debug);
> + program->compiler_ctx_state.debug = async_debug.base;
> + }
> +
> + util_queue_add_job(&sscreen->shader_compiler_queue,
> + program, &program->ready,
> + si_create_compute_state_async, NULL);
> +
> + if (wait) {
> + util_queue_fence_wait(&program->ready);
> + u_async_debug_drain(&async_debug, &sctx->debug);
> + u_async_debug_cleanup(&async_debug);
> + }
> } else {
> const struct pipe_llvm_program_header *header;
> const char *code;
> header = cso->prog;
> code = cso->prog + sizeof(struct pipe_llvm_program_header);
>
> ac_elf_read(code, header->num_bytes, &program->shader.binary);
> if (program->use_code_object_v2) {
> const amd_kernel_code_t *code_object =
> si_compute_get_code_object(program, 0);
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 1f6bb02a983..45b36878715 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -27,20 +27,21 @@
>
> #include "si_pipe.h"
> #include "sid.h"
> #include "gfx9d.h"
> #include "radeon/r600_cs.h"
>
> #include "tgsi/tgsi_parse.h"
> #include "tgsi/tgsi_ureg.h"
> #include "util/hash_table.h"
> #include "util/crc32.h"
> +#include "util/u_async_debug.h"
> #include "util/u_memory.h"
> #include "util/u_prim.h"
>
> #include "util/disk_cache.h"
> #include "util/mesa-sha1.h"
> #include "ac_exp_param.h"
>
> /* SHADER_CACHE */
>
> /**
> @@ -1839,28 +1840,24 @@ static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
> * there is no way to report compile failures to applications.
> */
> static void si_init_shader_selector_async(void *job, int thread_index)
> {
> struct si_shader_selector *sel = (struct si_shader_selector *)job;
> struct si_screen *sscreen = sel->screen;
> LLVMTargetMachineRef tm;
> struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
> unsigned i;
>
> - if (thread_index >= 0) {
> - assert(thread_index < ARRAY_SIZE(sscreen->tm));
> - tm = sscreen->tm[thread_index];
> - if (!debug->async)
> - debug = NULL;
> - } else {
> - tm = sel->compiler_ctx_state.tm;
> - }
> + assert(!debug->debug_message || debug->async);
> + assert(thread_index >= 0);
> + assert(thread_index < ARRAY_SIZE(sscreen->tm));
> + tm = sscreen->tm[thread_index];
>
> /* Compile the main shader part for use with a prolog and/or epilog.
> * If this fails, the driver will try to compile a monolithic shader
> * on demand.
> */
> if (!sscreen->use_monolithic_shaders) {
> struct si_shader *shader = CALLOC_STRUCT(si_shader);
> void *tgsi_binary = NULL;
>
> if (!shader) {
> @@ -2041,21 +2038,20 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
> struct si_screen *sscreen = (struct si_screen *)ctx->screen;
> struct si_context *sctx = (struct si_context*)ctx;
> struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
> int i;
>
> if (!sel)
> return NULL;
>
> pipe_reference_init(&sel->reference, 1);
> sel->screen = sscreen;
> - sel->compiler_ctx_state.tm = sctx->tm;
> sel->compiler_ctx_state.debug = sctx->debug;
> sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
>
> sel->so = state->stream_output;
>
> if (state->type == PIPE_SHADER_IR_TGSI) {
> sel->tokens = tgsi_dup_tokens(state->tokens);
> if (!sel->tokens) {
> FREE(sel);
> return NULL;
> @@ -2265,28 +2261,40 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
> sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) |
> S_02880C_EXEC_ON_HIER_FAIL(1);
> } else {
> /* Case 1. */
> sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
> }
>
> (void) mtx_init(&sel->mutex, mtx_plain);
> util_queue_fence_init(&sel->ready);
>
> - if ((sctx->debug.debug_message && !sctx->debug.async) ||
> - sctx->is_debug ||
> - si_can_dump_shader(&sscreen->b, sel->info.processor))
> - si_init_shader_selector_async(sel, -1);
> - else
> - util_queue_add_job(&sscreen->shader_compiler_queue, sel,
> - &sel->ready, si_init_shader_selector_async,
> - NULL);
> + struct util_async_debug_callback async_debug;
> + bool wait =
> + (sctx->debug.debug_message && !sctx->debug.async) ||
> + sctx->is_debug ||
> + si_can_dump_shader(&sscreen->b, sel->info.processor);
> +
> + if (wait) {
> + u_async_debug_init(&async_debug);
> + sel->compiler_ctx_state.debug = async_debug.base;
> + }
> +
> + util_queue_add_job(&sscreen->shader_compiler_queue, sel,
> + &sel->ready, si_init_shader_selector_async,
> + NULL);
> +
> + if (wait) {
> + util_queue_fence_wait(&sel->ready);
> + u_async_debug_drain(&async_debug, &sctx->debug);
> + u_async_debug_cleanup(&async_debug);
> + }
>
> return sel;
> }
>
> static void si_update_streamout_state(struct si_context *sctx)
> {
> struct si_shader_selector *shader_with_so = si_get_vs(sctx)->cso;
>
> if (!shader_with_so)
> return;
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list