[Mesa-dev] [PATCH 31/32] st/mesa: implement a tgsi on-disk shader cache

Nicolai Hähnle nhaehnle at gmail.com
Thu Feb 16 14:22:31 UTC 2017


On 14.02.2017 01:52, Timothy Arceri wrote:
> Implements a tgsi cache for the OpenGL state tracker.
>
> V2: add support for compute shaders

A few high-level points:

I think it would be nice to have the reading and writing functions in 
the same file, as in the GLSL case. It makes the structure of the code 
easier to follow.

The TGSI reading needs real error handling. As far as I can see, if the 
cache happens to lose one of the TGSI blobs for whatever reason), things 
will silently break in weird ways.

I also don't like that the cache SHA is calculated separately in two 
different places. Wouldn't it be possible to take the same approach as 
in GLSL, where the SHA is computed in one place, and then a different 
path is taken depending on whether the object is found in the cache or not?

One minor comment below:


> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 222 +++++++++++++++++++++++++++++
>  src/mesa/state_tracker/st_program.c        | 133 ++++++++++++++++-
>  2 files changed, 350 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 630f5af..b485776 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -32,6 +32,7 @@
>
>  #include "st_glsl_to_tgsi.h"
>
> +#include "compiler/glsl/blob.h"
>  #include "compiler/glsl/glsl_parser_extras.h"
>  #include "compiler/glsl/ir_optimization.h"
>  #include "compiler/glsl/program.h"
> @@ -47,6 +48,8 @@
>  #include "pipe/p_screen.h"
>  #include "tgsi/tgsi_ureg.h"
>  #include "tgsi/tgsi_info.h"
> +#include "util/disk_cache.h"
> +#include "util/mesa-sha1.h"
>  #include "util/u_math.h"
>  #include "util/u_memory.h"
>  #include "st_program.h"
> @@ -6999,6 +7002,219 @@ has_unsupported_control_flow(exec_list *ir,
>     return visitor.unsupported;
>  }
>
> +static void
> +read_stream_out_from_cache(struct blob_reader *blob_reader,
> +                           struct pipe_shader_state *tgsi)
> +{
> +   blob_copy_bytes(blob_reader, (uint8_t *) &tgsi->stream_output,
> +                    sizeof(tgsi->stream_output));
> +}
> +
> +static void
> +read_tgsi_from_cache(struct blob_reader *blob_reader,
> +                     const struct tgsi_token **tokens)
> +{
> +   uint32_t num_tokens  = blob_read_uint32(blob_reader);
> +   unsigned tokens_size = num_tokens * sizeof(struct tgsi_token);
> +   *tokens = (const tgsi_token*) MALLOC(tokens_size);
> +   blob_copy_bytes(blob_reader, (uint8_t *) *tokens, tokens_size);
> +}
> +
> +static void
> +load_tgsi_from_disk_cache(struct gl_context *ctx,
> +                          struct gl_shader_program *prog)
> +{
> +   unsigned char sha1[20];
> +   char sha1_buf[41];
> +   struct st_context *st = st_context(ctx);
> +
> +   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
> +      if (prog->_LinkedShaders[i] == NULL)
> +         continue;
> +
> +      char *buf = ralloc_strdup(NULL, "tsgi_tokens ");

Typo: tgsi (same below)

Cheers,
Nicolai


> +      _mesa_sha1_format(sha1_buf,
> +                        prog->_LinkedShaders[i]->Program->sh.data->sha1);
> +      ralloc_strcat(&buf, sha1_buf);
> +
> +      struct gl_program *glprog = prog->_LinkedShaders[i]->Program;
> +      switch (glprog->info.stage) {
> +      case MESA_SHADER_VERTEX:
> +         ralloc_strcat(&buf, " vs");
> +         _mesa_sha1_compute(buf, strlen(buf), sha1);
> +         break;
> +      case MESA_SHADER_TESS_EVAL:
> +         ralloc_strcat(&buf, " tes");
> +         _mesa_sha1_compute(buf, strlen(buf), sha1);
> +         break;
> +      case MESA_SHADER_TESS_CTRL:
> +         ralloc_strcat(&buf, " tcs");
> +         _mesa_sha1_compute(buf, strlen(buf), sha1);
> +         break;
> +      case MESA_SHADER_GEOMETRY:
> +         ralloc_strcat(&buf, " gs");
> +         _mesa_sha1_compute(buf, strlen(buf), sha1);
> +         break;
> +      case MESA_SHADER_FRAGMENT:
> +         ralloc_strcat(&buf, " fs");
> +         _mesa_sha1_compute(buf, strlen(buf), sha1);
> +         break;
> +      case MESA_SHADER_COMPUTE:
> +         ralloc_strcat(&buf, " cs");
> +         _mesa_sha1_compute(buf, strlen(buf), sha1);
> +         break;
> +
> +      default:
> +         unreachable("Unsupported stage");
> +      }
> +
> +      size_t size;
> +      uint8_t *buffer = (uint8_t *) disk_cache_get(ctx->Cache, sha1, &size);
> +      if (buffer) {
> +         struct blob_reader blob_reader;
> +         blob_reader_init(&blob_reader, buffer, size);
> +
> +         switch (glprog->info.stage) {
> +         case MESA_SHADER_VERTEX: {
> +            struct st_vertex_program *stvp =
> +               (struct st_vertex_program *) glprog;
> +
> +            st_release_vp_variants(st, stvp);
> +
> +            stvp->num_inputs = blob_read_uint32(&blob_reader);
> +            blob_copy_bytes(&blob_reader, (uint8_t *) stvp->index_to_input,
> +                            sizeof(stvp->index_to_input));
> +            blob_copy_bytes(&blob_reader, (uint8_t *) stvp->result_to_output,
> +                            sizeof(stvp->result_to_output));
> +
> +            read_stream_out_from_cache(&blob_reader, &stvp->tgsi);
> +            read_tgsi_from_cache(&blob_reader, &stvp->tgsi.tokens);
> +
> +            if (st->vp == stvp)
> +               st->dirty |= ST_NEW_VERTEX_PROGRAM(st, stvp);
> +
> +            break;
> +         }
> +         case MESA_SHADER_TESS_EVAL: {
> +            struct st_tesseval_program *sttep =
> +               (struct st_tesseval_program *) glprog;
> +
> +            st_release_basic_variants(st, sttep->Base.Target,
> +                                      &sttep->variants, &sttep->tgsi);
> +
> +            read_stream_out_from_cache(&blob_reader, &sttep->tgsi);
> +            read_tgsi_from_cache(&blob_reader, &sttep->tgsi.tokens);
> +
> +            if (st->tep == sttep)
> +               st->dirty |= sttep->affected_states;
> +
> +            break;
> +         }
> +         case MESA_SHADER_TESS_CTRL: {
> +            struct st_tessctrl_program *sttcp =
> +               (struct st_tessctrl_program *) glprog;
> +
> +            st_release_basic_variants(st, sttcp->Base.Target,
> +                                      &sttcp->variants, &sttcp->tgsi);
> +
> +            read_stream_out_from_cache(&blob_reader, &sttcp->tgsi);
> +            read_tgsi_from_cache(&blob_reader, &sttcp->tgsi.tokens);
> +
> +            if (st->tcp == sttcp)
> +               st->dirty |= sttcp->affected_states;
> +
> +            break;
> +         }
> +         case MESA_SHADER_GEOMETRY: {
> +            struct st_geometry_program *stgp =
> +               (struct st_geometry_program *) glprog;
> +
> +            st_release_basic_variants(st, stgp->Base.Target, &stgp->variants,
> +                                      &stgp->tgsi);
> +
> +            read_stream_out_from_cache(&blob_reader, &stgp->tgsi);
> +            read_tgsi_from_cache(&blob_reader, &stgp->tgsi.tokens);
> +
> +            if (st->gp == stgp)
> +               st->dirty |= stgp->affected_states;
> +
> +            break;
> +         }
> +         case MESA_SHADER_FRAGMENT: {
> +            struct st_fragment_program *stfp =
> +               (struct st_fragment_program *) glprog;
> +
> +            st_release_fp_variants(st, stfp);
> +
> +            read_tgsi_from_cache(&blob_reader, &stfp->tgsi.tokens);
> +
> +            if (st->fp == stfp)
> +               st->dirty |= stfp->affected_states;
> +
> +            break;
> +         }
> +         case MESA_SHADER_COMPUTE: {
> +            struct st_compute_program *stcp =
> +               (struct st_compute_program *) glprog;
> +
> +            st_release_cp_variants(st, stcp);
> +
> +            read_tgsi_from_cache(&blob_reader,
> +                                 (const tgsi_token**) &stcp->tgsi.prog);
> +
> +            stcp->tgsi.req_local_mem = stcp->Base.info.cs.shared_size;
> +            stcp->tgsi.req_private_mem = 0;
> +            stcp->tgsi.req_input_mem = 0;
> +
> +            if (st->cp == stcp)
> +                st->dirty |= stcp->affected_states;
> +
> +            break;
> +         }
> +         default:
> +            unreachable("Unsupported stage");
> +         }
> +
> +         if (blob_reader.current != blob_reader.end || blob_reader.overrun) {
> +            /* Something very bad has gone wrong discard the item from the
> +             * cache and rebuild/link from source.
> +             */
> +            assert(!"Invalid TGSI shader disk cache item!");
> +
> +            if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
> +               fprintf(stderr, "Error reading program from cache (invalid "
> +                       "TGSI cache item)\n");
> +            }
> +
> +            disk_cache_remove(ctx->Cache, sha1);
> +            for (unsigned i = 0; i < prog->NumShaders; i++) {
> +               _mesa_glsl_compile_shader(ctx, prog->Shaders[i], false, false,
> +                                         true);
> +            }
> +
> +            prog->data->cache_fallback = true;
> +            _mesa_glsl_link_shader(ctx, prog);
> +
> +            free(buf);
> +            return;
> +         }
> +
> +         if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
> +            _mesa_sha1_format(sha1_buf, sha1);
> +            fprintf(stderr, "%s tgsi_tokens retrieved from cache: %s\n",
> +                    _mesa_shader_stage_to_string(i), sha1_buf);
> +         }
> +
> +         set_prog_affected_state_flags(glprog);
> +         _mesa_associate_uniform_storage(ctx, prog, glprog->Parameters);
> +
> +         free(buffer);
> +      }
> +
> +      ralloc_free(buf);
> +   }
> +}
> +
>  extern "C" {
>
>  /**
> @@ -7010,6 +7226,12 @@ extern "C" {
>  GLboolean
>  st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
>  {
> +   /* Return early if we are loading the shader from on-disk cache */
> +   if (ctx->Cache && prog->data->LinkStatus == linking_skipped) {
> +      load_tgsi_from_disk_cache(ctx, prog);
> +      return GL_TRUE;
> +   }
> +
>     struct pipe_screen *pscreen = ctx->st->pipe->screen;
>     assert(prog->data->LinkStatus);
>
> diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
> index b2d1586..c85055e 100644
> --- a/src/mesa/state_tracker/st_program.c
> +++ b/src/mesa/state_tracker/st_program.c
> @@ -37,7 +37,10 @@
>  #include "program/prog_parameter.h"
>  #include "program/prog_print.h"
>  #include "program/programopt.h"
> +#include "util/disk_cache.h"
> +#include "util/mesa-sha1.h"
>
> +#include "compiler/glsl/blob.h"
>  #include "compiler/nir/nir.h"
>
>  #include "pipe/p_context.h"
> @@ -223,6 +226,111 @@ st_release_cp_variants(struct st_context *st, struct st_compute_program *stcp)
>     }
>  }
>
> +static void
> +write_stream_out_to_cache(struct blob *blob,
> +                          struct pipe_shader_state *tgsi)
> +{
> +   blob_write_bytes(blob, &tgsi->stream_output,
> +                    sizeof(tgsi->stream_output));
> +}
> +
> +static void
> +write_tgsi_to_cache(struct blob *blob, struct pipe_shader_state *tgsi,
> +                    struct st_context *st, unsigned char *sha1,
> +                    unsigned num_tokens)
> +{
> +   blob_write_uint32(blob, num_tokens);
> +   blob_write_bytes(blob, tgsi->tokens,
> +                    num_tokens * sizeof(struct tgsi_token));
> +
> +   disk_cache_put(st->ctx->Cache, sha1, blob->data, blob->size);
> +}
> +
> +/**
> + * Store tgsi and any other required state in on-disk shader cache.
> + */
> +static void
> +cache_tgsi(struct st_context *st, struct gl_program *prog,
> +           struct pipe_shader_state *out_state, unsigned num_tokens)
> +{
> +   if (!st->ctx->Cache)
> +      return;
> +
> +   /* Exit early when we are dealing with a ff shader with no source file to
> +    * generate a source from.
> +    */
> +   if (*prog->sh.data->sha1 == 0)
> +      return;
> +
> +   unsigned char sha1[20];
> +   char sha1_buf[41];
> +   char *buf = ralloc_strdup(NULL, "tsgi_tokens ");
> +   struct blob *blob = blob_create(NULL);
> +
> +   _mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
> +   ralloc_strcat(&buf, sha1_buf);
> +
> +   switch (prog->info.stage) {
> +   case MESA_SHADER_VERTEX: {
> +      ralloc_strcat(&buf, " vs");
> +     _mesa_sha1_compute(buf, strlen(buf), sha1);
> +
> +      struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
> +
> +      blob_write_uint32(blob, stvp->num_inputs);
> +      blob_write_bytes(blob, stvp->index_to_input,
> +                       sizeof(stvp->index_to_input));
> +      blob_write_bytes(blob, stvp->result_to_output,
> +                       sizeof(stvp->result_to_output));
> +
> +      write_stream_out_to_cache(blob, &stvp->tgsi);
> +      write_tgsi_to_cache(blob, &stvp->tgsi, st, sha1, num_tokens);
> +      break;
> +   }
> +   case MESA_SHADER_TESS_EVAL:
> +      ralloc_strcat(&buf, " tes");
> +      _mesa_sha1_compute(buf, strlen(buf), sha1);
> +      write_stream_out_to_cache(blob, out_state);
> +      write_tgsi_to_cache(blob, out_state, st, sha1, num_tokens);
> +      break;
> +   case MESA_SHADER_TESS_CTRL:
> +      ralloc_strcat(&buf, " tcs");
> +      _mesa_sha1_compute(buf, strlen(buf), sha1);
> +      write_stream_out_to_cache(blob, out_state);
> +      write_tgsi_to_cache(blob, out_state, st, sha1, num_tokens);
> +      break;
> +   case MESA_SHADER_GEOMETRY:
> +      ralloc_strcat(&buf, " gs");
> +      _mesa_sha1_compute(buf, strlen(buf), sha1);
> +      write_stream_out_to_cache(blob, out_state);
> +      write_tgsi_to_cache(blob, out_state, st, sha1, num_tokens);
> +      break;
> +   case MESA_SHADER_FRAGMENT: {
> +      ralloc_strcat(&buf, " fs");
> +      _mesa_sha1_compute(buf, strlen(buf), sha1);
> +
> +      struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
> +      write_tgsi_to_cache(blob, &stfp->tgsi, st, sha1, num_tokens);
> +      break;
> +   }
> +   case MESA_SHADER_COMPUTE:
> +      ralloc_strcat(&buf, " cs");
> +      _mesa_sha1_compute(buf, strlen(buf), sha1);
> +      write_tgsi_to_cache(blob, out_state, st, sha1, num_tokens);
> +      break;
> +   default:
> +      unreachable("Unsupported stage");
> +   }
> +
> +   if (st->ctx->_Shader->Flags & GLSL_CACHE_INFO) {
> +      _mesa_sha1_format(sha1_buf, sha1);
> +      fprintf(stderr, "putting %s tgsi_tokens in cache: %s\n",
> +              _mesa_shader_stage_to_string(prog->info.stage), sha1_buf);
> +   }
> +
> +   ralloc_free(blob);
> +   ralloc_free(buf);
> +}
>
>  /**
>   * Translate a vertex program.
> @@ -442,7 +550,6 @@ st_translate_vertex_program(struct st_context *st,
>                                        &stvp->tgsi.stream_output);
>
>        free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
> -      stvp->glsl_to_tgsi = NULL;
>     } else
>        error = st_translate_mesa_program(st->ctx,
>                                          PIPE_SHADER_VERTEX,
> @@ -467,8 +574,15 @@ st_translate_vertex_program(struct st_context *st,
>        return false;
>     }
>
> -   stvp->tgsi.tokens = ureg_get_tokens(ureg, NULL);
> +   unsigned num_tokens;
> +   stvp->tgsi.tokens = ureg_get_tokens(ureg, &num_tokens);
>     ureg_destroy(ureg);
> +
> +   if (stvp->glsl_to_tgsi) {
> +      stvp->glsl_to_tgsi = NULL;
> +      cache_tgsi(st, &stvp->Base, NULL, num_tokens);
> +   }
> +
>     return stvp->tgsi.tokens != NULL;
>  }
>
> @@ -890,7 +1004,6 @@ st_translate_fragment_program(struct st_context *st,
>                             fs_output_semantic_index);
>
>        free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
> -      stfp->glsl_to_tgsi = NULL;
>     } else if (stfp->ati_fs)
>        st_translate_atifs_program(ureg,
>                                   stfp->ati_fs,
> @@ -923,8 +1036,15 @@ st_translate_fragment_program(struct st_context *st,
>                                  fs_output_semantic_name,
>                                  fs_output_semantic_index);
>
> -   stfp->tgsi.tokens = ureg_get_tokens(ureg, NULL);
> +   unsigned num_tokens;
> +   stfp->tgsi.tokens = ureg_get_tokens(ureg, &num_tokens);
>     ureg_destroy(ureg);
> +
> +   if (stfp->glsl_to_tgsi) {
> +      stfp->glsl_to_tgsi = NULL;
> +      cache_tgsi(st, &stfp->Base, NULL, num_tokens);
> +   }
> +
>     return stfp->tgsi.tokens != NULL;
>  }
>
> @@ -1459,13 +1579,16 @@ st_translate_program_common(struct st_context *st,
>                          output_semantic_name,
>                          output_semantic_index);
>
> -   out_state->tokens = ureg_get_tokens(ureg, NULL);
> +   unsigned num_tokens;
> +   out_state->tokens = ureg_get_tokens(ureg, &num_tokens);
>     ureg_destroy(ureg);
>
>     st_translate_stream_output_info(glsl_to_tgsi,
>                                     outputMapping,
>                                     &out_state->stream_output);
>
> +   cache_tgsi(st, prog, out_state, num_tokens);
> +
>     if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
>        _mesa_print_program(prog);
>        debug_printf("\n");
>



More information about the mesa-dev mailing list