[Mesa-dev] [PATCH 06/27] i965: add initial implementation of on disk shader cache

Jason Ekstrand jason at jlekstrand.net
Sat Aug 19 16:15:27 UTC 2017


On August 19, 2017 12:45:09 AM Jordan Justen <jordan.l.justen at intel.com> wrote:

> From: Timothy Arceri <timothy.arceri at collabora.com>
>
> This uses the recently-added disk_cache.c to write out the final
> linked binary for vertex and fragment shader programs.
>
> This is based off the initial implementation done by Carl Worth.
>
> [jordan.l.justen at intel.com: *_cached_program => brw_disk_cache_*_program]
> [jordan.l.justen at intel.com: brw_shader_cache.c => brw_disk_cache.c]
> [jordan.l.justen at intel.com: don't map to write program when LLC is present]
> [jordan.l.justen at intel.com: set program_written_to_cache on read from cache]
> [jordan.l.justen at intel.com: only try cache when status is linking_skipped]
> Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
> ---
>  src/mesa/drivers/dri/i965/Makefile.sources |   1 +
>  src/mesa/drivers/dri/i965/brw_disk_cache.c | 395 +++++++++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_state.h      |   5 +
>  3 files changed, 401 insertions(+)
>  create mode 100644 src/mesa/drivers/dri/i965/brw_disk_cache.c
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
> b/src/mesa/drivers/dri/i965/Makefile.sources
> index 425c883de8..6e21010bae 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -14,6 +14,7 @@ i965_FILES = \
>  	brw_cs.h \
>  	brw_curbe.c \
>  	brw_defines.h \
> +	brw_disk_cache.cpp \
>  	brw_draw.c \
>  	brw_draw.h \
>  	brw_draw_upload.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
> b/src/mesa/drivers/dri/i965/brw_disk_cache.c
> new file mode 100644
> index 0000000000..b56e561e14
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
> @@ -0,0 +1,395 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "compiler/glsl/blob.h"
> +#include "compiler/glsl/ir_uniform.h"
> +#include "compiler/glsl/shader_cache.h"
> +#include "main/mtypes.h"
> +#include "util/disk_cache.h"
> +#include "util/macros.h"
> +#include "util/mesa-sha1.h"
> +
> +#include "brw_context.h"
> +#include "brw_state.h"
> +#include "brw_vs.h"
> +#include "brw_wm.h"
> +
> +static uint64_t
> +ptr_to_uint64_t(void *ptr)
> +{
> +   uint64_t ptr_int = (uint64_t) ptr;
> +#if __i386__
> +   ptr_int &= 0xFFFFFFFF;
> +#endif
> +   return ptr_int;
> +}
> +
> +static size_t
> +key_size(gl_shader_stage stage)
> +{
> +   switch (stage) {
> +   case MESA_SHADER_VERTEX:
> +      return sizeof(struct brw_vs_prog_key);
> +   case MESA_SHADER_TESS_CTRL:
> +      return sizeof(struct brw_tcs_prog_key);
> +   case MESA_SHADER_TESS_EVAL:
> +      return sizeof(struct brw_tes_prog_key);
> +   case MESA_SHADER_GEOMETRY:
> +      return sizeof(struct brw_gs_prog_key);
> +   case MESA_SHADER_FRAGMENT:
> +      return sizeof(struct brw_wm_prog_key);
> +   case MESA_SHADER_COMPUTE:
> +      return sizeof(struct brw_cs_prog_key);
> +   default:
> +      unreachable("Unsupported stage!");
> +   }
> +}
> +
> +static void
> +gen_shader_sha1(struct brw_context *brw, struct gl_program *prog,
> +                gl_shader_stage stage, void *key, unsigned char *out_sha1)
> +{
> +   char sha1_buf[41];
> +   unsigned char sha1[20];
> +   char manifest[256];
> +   int offset = 0;
> +
> +   _mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
> +   offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf);
> +
> +   _mesa_sha1_compute(key, key_size(stage), sha1);
> +   _mesa_sha1_format(sha1_buf, sha1);
> +   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
> +                      "%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage),
> +                      sha1_buf);
> +
> +   _mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
> +}
> +
> +static void
> +load_program_data(struct gl_program *glprog, struct blob_reader *binary,

Calling one "load" and the other "write" is weird.  Can we go with either 
read/write or load/store?

> +                  struct brw_stage_prog_data *prog_data,
> +                  gl_shader_stage stage, struct gl_context *ctx)
> +{
> +   static const gl_constant_value zero = { 0 };
> +
> +   uint64_t parameter_values_base = blob_read_uint64(binary);
> +   uint64_t uniform_data_slots_base = blob_read_uint64(binary);
> +
> +   uint32_t nr_params = blob_read_uint32(binary);
> +   assert(nr_params == prog_data->nr_params);
> +
> +   prog_data->param = rzalloc_array(NULL, const gl_constant_value *,
> +                                    nr_params);
> +   if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
> +      fprintf(stderr, "Allocating %d prog_data->params (%p)\n",
> +              prog_data->nr_params, prog_data->param);
> +   }
> +
> +   for (unsigned i = 0; i < nr_params; i++) {
> +      uint64_t param = blob_read_uint64(binary);
> +      ptrdiff_t p_offset, u_offset;

These need to be explicitly 64 bits of you risk weird overflow problems 
when computing the reconstruction.  (A shared may get cached by a 64-bit 
application and read by a 32-bit version of the same app.)  Also, if you 
make them unsigned, you can just drop the >= 0 check.

> +      struct gl_program_parameter_list *param_list = glprog->Parameters;
> +
> +      p_offset = (param - parameter_values_base) / sizeof(gl_constant_value);
> +      u_offset = (param - uniform_data_slots_base) / 
> sizeof(gl_constant_value);
> +
> +      if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) {
> +         prog_data->param[i] =
> +            ((gl_constant_value *) param_list->ParameterValues) + p_offset;
> +      } else if (u_offset >= 0 &&
> +                 u_offset < glprog->sh.data->NumUniformDataSlots) {
> +         prog_data->param[i] = glprog->sh.data->UniformDataSlots + u_offset;
> +      } else {
> +         prog_data->param[i] = &zero;

Should this be an assert?

> +      }
> +   }
> +
> +   uint32_t nr_pull_params = blob_read_uint32(binary);
> +   assert(nr_pull_params == prog_data->nr_pull_params);
> +
> +   prog_data->pull_param = rzalloc_array(NULL, const gl_constant_value *,
> +                                         nr_pull_params);
> +
> +   for (unsigned i = 0; i < nr_pull_params; i++) {
> +      /* FIXME: We need to fixup pull_params pointers here. */

This too.  In fact, it would probably be better to write read_param_array 
and write_param_array helpers and avoid the duplication.  Anything that can 
end up in one can end up in the other.

> +   }
> +}
> +
> +#define SET_UPLOAD_PRAMS(sh, sh_caps, prog)                          \
> +      assert(prog_data_size == sizeof(struct brw_##sh##_prog_data)); \
> +      sh##_key.program_string_id = prog->id;                         \
> +      cache_id = BRW_CACHE_##sh_caps##_PROG;                         \
> +      key = &sh##_key;                                               \
> +      max_threads = devinfo->max_##sh##_threads;                     \
> +      stage_state = &brw->sh.base;                                   \
> +
> +static bool
> +read_and_upload(struct brw_context *brw, struct disk_cache *cache,
> +                struct blob_reader *binary, struct gl_program *prog,
> +                gl_shader_stage stage)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +
> +   unsigned char binary_sha1[20];
> +
> +   struct brw_wm_prog_key wm_key;
> +   struct brw_vs_prog_key vs_key;
> +
> +   switch (stage) {
> +   case MESA_SHADER_VERTEX:
> +      brw_vs_populate_key(brw, &vs_key);
> +      /* We don't care what instance of the program it is we only care if
> +       * its the correct binary to load so ignore program id for on disk 
> cache.
> +       */
> +      vs_key.program_string_id = 0;
> +      gen_shader_sha1(brw, prog, stage, &vs_key, binary_sha1);
> +      break;
> +   case MESA_SHADER_FRAGMENT:
> +      brw_wm_populate_key(brw, &wm_key);
> +      wm_key.program_string_id = 0;
> +      gen_shader_sha1(brw, prog, stage, &wm_key, binary_sha1);
> +      break;
> +   default:
> +      unreachable("Unsupported stage!");
> +   }
> +
> +   size_t size;
> +   uint8_t *buffer = disk_cache_get(cache, binary_sha1, &size);
> +   if (buffer == NULL) {
> +      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
> +         char sha1_buf[41];
> +         _mesa_sha1_format(sha1_buf, binary_sha1);
> +         fprintf(stderr, "No cached %s binary found for: %s\n",
> +                 _mesa_shader_stage_to_abbrev(stage), sha1_buf);
> +      }
> +      return false;
> +   }
> +
> +   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
> +      char sha1_buf[41];
> +      _mesa_sha1_format(sha1_buf, binary_sha1);
> +      fprintf(stderr, "attempting to populate bo cache with binary: %s\n",
> +              sha1_buf);
> +   }
> +
> +   blob_reader_init(binary, buffer, size);
> +
> +   /* Read shader program from blob. */
> +   size_t program_size = blob_read_uint32(binary);
> +   uint8_t *program = blob_read_bytes(binary, program_size);
> +
> +   /* Read shader program_data from blob. */
> +   size_t prog_data_size = blob_read_uint32(binary);
> +   struct brw_stage_prog_data *prog_data =
> +      blob_read_bytes(binary, prog_data_size);
> +
> +   /* Upload params set by SET_UPLOAD_PRAMS() */
> +   struct brw_stage_state *stage_state;
> +   enum brw_cache_id cache_id;
> +   unsigned max_threads;
> +   void *key;
> +
> +   switch (stage) {
> +   case MESA_SHADER_VERTEX: {
> +      struct brw_program *vp = (struct brw_program *) prog;
> +      SET_UPLOAD_PRAMS(vs, VS, vp)
> +      break;
> +   }
> +   case MESA_SHADER_FRAGMENT: {
> +      struct brw_program *wp = (struct brw_program *) prog;
> +      SET_UPLOAD_PRAMS(wm, FS, wp)
> +      break;
> +   }
> +   default:
> +      unreachable("Unsupported stage!");
> +   }
> +
> +   load_program_data(prog, binary, prog_data, stage, &brw->ctx);
> +
> +   if (binary->current != binary->end || binary->overrun) {
> +      /* Something very bad has gone wrong discard the item from the cache and
> +       * rebuild from source.
> +       */
> +      assert(!"Invalid i965 shader disk cache item!");
> +
> +      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
> +         fprintf(stderr, "Error reading program from cache (invalid i965 "
> +                 "cache item)\n");
> +      }
> +
> +      disk_cache_remove(cache, binary_sha1);
> +      free(buffer);
> +      return false;
> +   }
> +
> +   brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch,
> +                           max_threads);
> +
> +   brw_upload_cache(&brw->cache, cache_id, key, key_size(stage), program,
> +                    program_size, prog_data, prog_data_size,
> +                    &stage_state->prog_offset, &stage_state->prog_data);
> +
> +   prog->program_written_to_cache = true;
> +
> +   free(buffer);
> +
> +   return true;
> +}
> +
> +bool
> +brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage)
> +{
> +   struct blob_reader binary;
> +
> +   struct disk_cache *cache = brw->ctx.Cache;
> +   if (cache == NULL)
> +      return false;
> +
> +   struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
> +   if (prog == NULL)
> +      return false;
> +
> +   if (prog->sh.data->LinkStatus != linking_skipped)
> +      goto FAIL;
> +
> +   if (!read_and_upload(brw, cache, &binary, prog, stage))
> +      goto FAIL;
> +
> +   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
> +      fprintf(stderr, "%s: Successfully read every byte written!\n",
> +              __FUNCTION__);
> +   }
> +
> +   return true;
> +
> +FAIL:
> +   /*FIXME: Fall back and compile from source here. */
> +   return false;
> +}
> +
> +static void
> +write_program_data(struct brw_context *brw, struct gl_program *prog,
> +                   void *key, struct brw_stage_prog_data *prog_data,
> +                   size_t program_size, size_t prog_data_size,
> +                   uint32_t prog_offset, struct disk_cache *cache,
> +                   gl_shader_stage stage)
> +{
> +   unsigned char sha1[20];
> +   char buf[41];
> +
> +   struct blob *binary = blob_create();
> +   if (!binary)
> +      return;
> +
> +   gen_shader_sha1(brw, prog, stage, key, sha1);
> +
> +   /* Write program to blob. */
> +   blob_write_uint32(binary, program_size);
> +
> +   uint8_t *blob_cursor = blob_reserve_bytes(binary, program_size);
> +
> +   /* Copy program binary */
> +   if (brw->has_llc) {
> +      memcpy(blob_cursor, brw->cache.map + prog_offset, program_size);
> +   } else {
> +      void *map = brw_bo_map(brw, brw->cache.bo, MAP_READ);
> +      if (unlikely(!map)) {
> +         _mesa_error_no_memory(__func__);
> +         return;
> +      }
> +      memcpy(blob_cursor, map + prog_offset, program_size);
> +      brw_bo_unmap(brw->cache.bo);
> +   }
> +
> +   /* Write program_data to blob. */
> +   blob_write_uint32(binary, prog_data_size);
> +   blob_write_bytes(binary, prog_data, prog_data_size);
> +
> +   /* Include variable-length params from end of brw_stage_prog_data as well.
> +    *
> +    * Before writing either of the params or pull_params arrays, we first
> +    * write out the addresses of the ParameterValues and UniformDataSlots
> +    * storage. The pointers within params will be pointers to within one of
> +    * these blocks of storage. So we can use the addresses of this storage
> +    * together with the pointer values to correctly construct pointers to the
> +    * actual storage when the program data is loaded from the cache.
> +    */

This is a very clever way to solve the problem.  However, it makes me a but 
uncomfortable that we store actual pointers.  Why not put the 
pointer->offset logic on this end and store each as a 32-bit value that 
specifies some sort of "domain" such as PARAMETER or UNIFORM followed 
optionally by a 32-bit index into that domain.  Then the load function 
would look like

domain = read_uint32();
switch (domain) {
case UNIFORM:
   idx = read_uint32();
   param[i] = &prog->sh.data->UniformDataSlots[idx];
   break;
   ...
}

That would make things far more explicit.  It would also give you the 
chance to bail on filling out the cache entry whenever you see something 
you don't recognize instead of blindly copying it in and hoping the load 
function does something sensible with every entry.

> +   blob_write_uint64(binary,
> +                     ptr_to_uint64_t(prog->Parameters->ParameterValues));
> +
> +   blob_write_uint64(binary, 
> ptr_to_uint64_t(prog->sh.data->UniformDataSlots));
> +
> +   blob_write_uint32(binary, prog_data->nr_params);
> +
> +   for (unsigned i = 0; i < prog_data->nr_params; i++) {
> +      blob_write_uint64(binary, ptr_to_uint64_t((void *) 
> prog_data->param[i]));
> +   }
> +
> +   blob_write_uint32(binary, prog_data->nr_pull_params);
> +
> +   _mesa_sha1_format(buf, sha1);
> +   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
> +      fprintf(stderr, "putting binary in cache: %s\n", buf);
> +   }
> +
> +   disk_cache_put(cache, sha1, binary->data, binary->size);
> +
> +   prog->program_written_to_cache = true;
> +   free(binary);
> +}
> +
> +void
> +brw_disk_cache_write_program(struct brw_context *brw)
> +{
> +   struct disk_cache *cache = brw->ctx.Cache;
> +   if (cache == NULL)
> +      return;
> +
> +   struct gl_program *prog =
> +      brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
> +   if (prog && !prog->program_written_to_cache) {
> +      struct brw_vs_prog_key vs_key;
> +      brw_vs_populate_key(brw, &vs_key);
> +      vs_key.program_string_id = 0;
> +
> +      write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
> +                         brw->vs.base.prog_data->program_size,
> +                         sizeof(struct brw_vs_prog_data),
> +                         brw->vs.base.prog_offset, cache,
> +                         MESA_SHADER_VERTEX);
> +   }
> +
> +   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
> +   if (prog && !prog->program_written_to_cache) {
> +      struct brw_wm_prog_key wm_key;
> +      brw_wm_populate_key(brw, &wm_key);
> +      wm_key.program_string_id = 0;
> +
> +      write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
> +                         brw->wm.base.prog_data->program_size,
> +                         sizeof(struct brw_wm_prog_data),
> +                         brw->wm.base.prog_offset, cache,
> +                         MESA_SHADER_FRAGMENT);
> +   }
> +}
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
> b/src/mesa/drivers/dri/i965/brw_state.h
> index 46665aae12..8b468c752c 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -131,6 +131,11 @@ void brw_upload_state_base_address(struct brw_context 
> *brw);
>  void gen8_write_pma_stall_bits(struct brw_context *brw,
>                                 uint32_t pma_stall_bits);
>
> +/* brw_disk_cache.c */
> +bool brw_disk_cache_upload_program(struct brw_context *brw,
> +                                   gl_shader_stage stage);
> +void brw_disk_cache_write_program(struct brw_context *brw);
> +
>  /***********************************************************************
>   * brw_state.c
>   */
> --
> 2.14.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev




More information about the mesa-dev mailing list