[Mesa-dev] [PATCH v2 18/32] i965: add initial implementation of on disk shader cache

Jason Ekstrand jason at jlekstrand.net
Fri Oct 20 23:30:07 UTC 2017


On Fri, Oct 20, 2017 at 4:05 PM, Jason Ekstrand <jason at jlekstrand.net>
wrote:

> On Wed, Oct 18, 2017 at 10:32 PM, Jordan Justen <jordan.l.justen at intel.com
> > wrote:
>
>> From: Timothy Arceri <timothy.arceri at collabora.com>
>>
>> This uses the recently-added disk_cache.c to write out the final
>> linked binary for vertex and fragment shader programs.
>>
>> This is based off the initial implementation done by Carl Worth.
>>
>> v2:
>>  * Squash 'i965: add image param shader cache support'
>>  * Squash 'i965: add shader cache support for pull param pointers'
>>  * Sustantially simplified by a rework on top of Jason's 2975e4c56a7a.
>>  * Rename load_program_data to read_program_data. (Jason)
>>
>> [jordan.l.justen at intel.com: *_cached_program => brw_disk_cache_*_program]
>> [jordan.l.justen at intel.com: brw_shader_cache.c => brw_disk_cache.c]
>> [jordan.l.justen at intel.com: don't map to write program when LLC is
>> present]
>> [jordan.l.justen at intel.com: set program_written_to_cache on read from
>> cache]
>> [jordan.l.justen at intel.com: only try cache when status is
>> linking_skipped]
>> [jordan.l.justen at intel.com: rework based on uniforms rework 2975e4c56a7a]
>> Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
>> ---
>>  src/mesa/drivers/dri/i965/Makefile.sources |   1 +
>>  src/mesa/drivers/dri/i965/brw_disk_cache.c | 357
>> +++++++++++++++++++++++++++++
>>  src/mesa/drivers/dri/i965/brw_state.h      |   5 +
>>  src/mesa/drivers/dri/i965/meson.build      |   1 +
>>  4 files changed, 364 insertions(+)
>>  create mode 100644 src/mesa/drivers/dri/i965/brw_disk_cache.c
>>
>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources
>> b/src/mesa/drivers/dri/i965/Makefile.sources
>> index 053d89b81e..2980cdb3c5 100644
>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> @@ -14,6 +14,7 @@ i965_FILES = \
>>         brw_cs.h \
>>         brw_curbe.c \
>>         brw_defines.h \
>> +       brw_disk_cache.c \
>>         brw_draw.c \
>>         brw_draw.h \
>>         brw_draw_upload.c \
>> diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c
>> b/src/mesa/drivers/dri/i965/brw_disk_cache.c
>> new file mode 100644
>> index 0000000000..6fe39a7997
>> --- /dev/null
>> +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
>> @@ -0,0 +1,357 @@
>> +/*
>> + * Copyright © 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining
>> a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> next
>> + * paragraph) shall be included in all copies or substantial portions of
>> the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "compiler/blob.h"
>> +#include "compiler/glsl/ir_uniform.h"
>> +#include "compiler/glsl/shader_cache.h"
>> +#include "main/mtypes.h"
>> +#include "util/disk_cache.h"
>> +#include "util/macros.h"
>> +#include "util/mesa-sha1.h"
>> +
>> +#include "brw_context.h"
>> +#include "brw_state.h"
>> +#include "brw_vs.h"
>> +#include "brw_wm.h"
>> +
>> +static size_t
>> +key_size(gl_shader_stage stage)
>> +{
>> +   switch (stage) {
>> +   case MESA_SHADER_VERTEX:
>> +      return sizeof(struct brw_vs_prog_key);
>> +   case MESA_SHADER_TESS_CTRL:
>> +      return sizeof(struct brw_tcs_prog_key);
>> +   case MESA_SHADER_TESS_EVAL:
>> +      return sizeof(struct brw_tes_prog_key);
>> +   case MESA_SHADER_GEOMETRY:
>> +      return sizeof(struct brw_gs_prog_key);
>> +   case MESA_SHADER_FRAGMENT:
>> +      return sizeof(struct brw_wm_prog_key);
>> +   case MESA_SHADER_COMPUTE:
>> +      return sizeof(struct brw_cs_prog_key);
>> +   default:
>> +      unreachable("Unsupported stage!");
>> +   }
>> +}
>>
>
> It might be worth putting this in brw_program.h and maybe adding one for
> prog_data as well.
>
>
>> +
>> +static void
>> +gen_shader_sha1(struct brw_context *brw, struct gl_program *prog,
>> +                gl_shader_stage stage, void *key, unsigned char
>> *out_sha1)
>> +{
>> +   char sha1_buf[41];
>> +   unsigned char sha1[20];
>> +   char manifest[256];
>> +   int offset = 0;
>> +
>> +   _mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
>> +   offset += snprintf(manifest, sizeof(manifest), "program: %s\n",
>> sha1_buf);
>> +
>> +   _mesa_sha1_compute(key, key_size(stage), sha1);
>> +   _mesa_sha1_format(sha1_buf, sha1);
>> +   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
>> +                      "%s_key: %s\n", _mesa_shader_stage_to_abbrev(s
>> tage),
>> +                      sha1_buf);
>> +
>> +   _mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
>> +}
>> +
>> +static void
>> +read_program_data(struct gl_program *glprog, struct blob_reader *binary,
>> +                  struct brw_stage_prog_data *prog_data,
>> +                  struct brw_stage_state *stage_state, struct gl_context
>> *ctx)
>>
>
> With the simplifications below, this becomes a very short function.  Can
> we either roll it into read_and_upload or pull the rest of the prog_data
> read code into this function?  I'd really like it if read_program_data and
> write_program_data were almost identical.
>
>
>> +{
>> +   uint32_t nr_params = blob_read_uint32(binary);
>> +   assert(nr_params == prog_data->nr_params);
>>
>
> If we trust prog_data (which we had better be able to do!) this is
> unnecessary.  Same with nr_pull_params.
>
>
>> +   assert(!binary->overrun);
>> +
>> +   prog_data->param = rzalloc_array(NULL, uint32_t, nr_params);
>> +
>> +   uint32_t nr_image_params = blob_read_uint32(binary);
>> +   assert(nr_image_params == glprog->info.num_images);
>>
>
> I don't think we need this anymore.  Let's just drop nr_image_params from
> read/write_program_data
>
>
>> +   assert(!binary->overrun);
>> +
>> +   for (unsigned i = 0; i < nr_params; i++)
>> +      prog_data->param[i] = blob_read_uint32(binary);
>> +
>> +   uint32_t nr_pull_params = blob_read_uint32(binary);
>> +   assert(nr_pull_params == prog_data->nr_pull_params);
>> +
>> +   prog_data->pull_param = rzalloc_array(NULL, uint32_t, nr_pull_params);
>> +
>> +   for (unsigned i = 0; i < nr_pull_params; i++)
>> +      prog_data->pull_param[i] = blob_read_uint32(binary);
>> +
>> +   assert(!binary->overrun);
>> +}
>> +
>> +#define SET_UPLOAD_PRAMS(sh, sh_caps, prog)                          \
>> +      assert(prog_data_size == sizeof(struct brw_##sh##_prog_data)); \
>> +      sh##_key.program_string_id = prog->id;                         \
>> +      cache_id = BRW_CACHE_##sh_caps##_PROG;                         \
>> +      key = &sh##_key;                                               \
>> +      max_threads = devinfo->max_##sh##_threads;                     \
>> +      stage_state = &brw->sh.base;                                   \
>> +
>> +static bool
>> +read_and_upload(struct brw_context *brw, struct disk_cache *cache,
>> +                struct blob_reader *binary, struct gl_program *prog,
>> +                gl_shader_stage stage)
>> +{
>> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> +
>> +   unsigned char binary_sha1[20];
>> +
>> +   struct brw_wm_prog_key wm_key;
>> +   struct brw_vs_prog_key vs_key;
>> +
>> +   switch (stage) {
>> +   case MESA_SHADER_VERTEX:
>> +      brw_vs_populate_key(brw, &vs_key);
>> +      /* We don't care what instance of the program it is we only care if
>> +       * its the correct binary to load so ignore program id for on disk
>> cache.
>> +       */
>> +      vs_key.program_string_id = 0;
>> +      gen_shader_sha1(brw, prog, stage, &vs_key, binary_sha1);
>> +      break;
>> +   case MESA_SHADER_FRAGMENT:
>> +      brw_wm_populate_key(brw, &wm_key);
>> +      wm_key.program_string_id = 0;
>> +      gen_shader_sha1(brw, prog, stage, &wm_key, binary_sha1);
>> +      break;
>> +   default:
>> +      unreachable("Unsupported stage!");
>> +   }
>> +
>> +   size_t size;
>> +   uint8_t *buffer = disk_cache_get(cache, binary_sha1, &size);
>> +   if (buffer == NULL) {
>> +      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>> +         char sha1_buf[41];
>> +         _mesa_sha1_format(sha1_buf, binary_sha1);
>> +         fprintf(stderr, "No cached %s binary found for: %s\n",
>> +                 _mesa_shader_stage_to_abbrev(stage), sha1_buf);
>> +      }
>> +      return false;
>> +   }
>> +
>> +   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>> +      char sha1_buf[41];
>> +      _mesa_sha1_format(sha1_buf, binary_sha1);
>> +      fprintf(stderr, "attempting to populate bo cache with binary:
>> %s\n",
>> +              sha1_buf);
>> +   }
>> +
>> +   blob_reader_init(binary, buffer, size);
>> +
>> +   /* Read shader program from blob. */
>> +   size_t program_size = blob_read_uint32(binary);
>> +   const uint8_t *program = blob_read_bytes(binary, program_size);
>> +
>> +   /* Read shader program_data from blob. */
>> +   size_t prog_data_size = blob_read_uint32(binary);
>>
>
> assert(prog_data_size == brw_prog_data_size(stage));
>
>
>> +   assert(!binary->overrun);
>> +   const void *blob_prog_data = blob_read_bytes(binary, prog_data_size);
>> +   // TODO: fix mem_ctx
>> +   struct brw_stage_prog_data *prog_data =
>> +      ralloc_size(NULL, prog_data_size);
>> +   memcpy(prog_data, blob_prog_data, prog_data_size);
>>
>
> You can just use blob_copy_bytes instead of this.
>
>
>> +
>> +   /* Upload params set by SET_UPLOAD_PRAMS() */
>> +   struct brw_stage_state *stage_state;
>> +   enum brw_cache_id cache_id;
>> +   unsigned max_threads;
>> +   void *key;
>> +
>> +   switch (stage) {
>> +   case MESA_SHADER_VERTEX: {
>> +      struct brw_program *vp = (struct brw_program *) prog;
>> +      SET_UPLOAD_PRAMS(vs, VS, vp)
>> +      break;
>> +   }
>> +   case MESA_SHADER_FRAGMENT: {
>> +      struct brw_program *wp = (struct brw_program *) prog;
>> +      SET_UPLOAD_PRAMS(wm, FS, wp)
>> +      break;
>> +   }
>> +   default:
>> +      unreachable("Unsupported stage!");
>> +   }
>> +
>> +   read_program_data(prog, binary, prog_data, stage_state, &brw->ctx);
>> +
>> +   if (binary->current != binary->end || binary->overrun) {
>> +      /* Something very bad has gone wrong discard the item from the
>> cache and
>> +       * rebuild from source.
>> +       */
>> +      assert(!"Invalid i965 shader disk cache item!");
>> +
>> +      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>> +         fprintf(stderr, "Error reading program from cache (invalid i965
>> "
>> +                 "cache item)\n");
>> +      }
>> +
>> +      disk_cache_remove(cache, binary_sha1);
>> +      free(buffer);
>> +      return false;
>> +   }
>> +
>> +   brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch,
>> +                           max_threads);
>> +
>> +   brw_upload_cache(&brw->cache, cache_id, key, key_size(stage), program,
>> +                    program_size, prog_data, prog_data_size,
>> +                    &stage_state->prog_offset, &stage_state->prog_data);
>> +
>> +   prog->program_written_to_cache = true;
>> +
>> +   free(buffer);
>> +
>> +   return true;
>> +}
>> +
>> +bool
>> +brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage
>> stage)
>> +{
>> +   struct blob_reader binary;
>> +
>> +   struct disk_cache *cache = brw->ctx.Cache;
>> +   if (cache == NULL)
>> +      return false;
>> +
>> +   struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
>> +   if (prog == NULL)
>> +      return false;
>> +
>> +   if (prog->sh.data->LinkStatus != linking_skipped)
>> +      goto FAIL;
>> +
>> +   if (!read_and_upload(brw, cache, &binary, prog, stage))
>> +      goto FAIL;
>> +
>> +   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>> +      fprintf(stderr, "read gen program from cache\n");
>> +   }
>> +
>> +   return true;
>> +
>> +FAIL:
>> +   /*FIXME: Fall back and compile from source here. */
>> +   return false;
>> +}
>> +
>> +static void
>> +write_program_data(struct brw_context *brw, struct gl_program *prog,
>> +                   void *key, struct brw_stage_prog_data *prog_data,
>> +                   size_t program_size, size_t prog_data_size,
>> +                   uint32_t prog_offset, struct disk_cache *cache,
>> +                   gl_shader_stage stage)
>> +{
>> +   unsigned char sha1[20];
>> +   char buf[41];
>> +
>> +   struct blob binary;
>> +   blob_init(&binary);
>> +
>> +   gen_shader_sha1(brw, prog, stage, key, sha1);
>> +
>> +   /* Write program to blob. */
>> +   blob_write_uint32(&binary, program_size);
>> +
>> +   size_t blob_offset = blob_reserve_bytes(&binary, program_size);
>> +   uint8_t *blob_cursor = binary.data + blob_offset;
>> +
>> +   /* Copy program binary */
>> +   if (brw->screen->devinfo.has_llc) {
>> +      memcpy(blob_cursor, brw->cache.map + prog_offset, program_size);
>>
>
> blob_overwrite_bytes
>
>
>> +   } else {
>> +      void *map = brw_bo_map(brw, brw->cache.bo, MAP_READ);
>> +      if (unlikely(!map)) {
>> +         _mesa_error_no_memory(__func__);
>> +         return;
>> +      }
>> +      memcpy(blob_cursor, map + prog_offset, program_size);
>>
>
> blob_overwrite_bytes
>
>
>> +      brw_bo_unmap(brw->cache.bo);
>> +   }
>> +
>> +   /* Write program_data to blob. */
>> +   blob_write_uint32(&binary, prog_data_size);
>> +   blob_write_bytes(&binary, prog_data, prog_data_size);
>> +
>> +   blob_write_uint32(&binary, prog_data->nr_params);
>> +   blob_write_uint32(&binary, prog->info.num_images);
>> +
>> +   for (unsigned i = 0; i < prog_data->nr_params; i++) {
>> +      blob_write_uint32(&binary, prog_data->param[i]);
>>
>
> If we're concerned about the efficiency of this code,
> blob_read/write_bytes might be better here.  I don't know that we care that
> much though.
>
>
>> +   }
>> +
>> +   blob_write_uint32(&binary, prog_data->nr_pull_params);
>> +   for (unsigned i = 0; i < prog_data->nr_pull_params; i++) {
>> +      blob_write_uint32(&binary, prog_data->pull_param[i]);
>> +   }
>> +
>> +   _mesa_sha1_format(buf, sha1);
>> +   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
>> +      fprintf(stderr, "putting binary in cache: %s\n", buf);
>> +   }
>> +
>> +   disk_cache_put(cache, sha1, binary.data, binary.size, NULL);
>> +
>> +   prog->program_written_to_cache = true;
>> +   blob_finish(&binary);
>> +}
>> +
>> +void
>> +brw_disk_cache_write_program(struct brw_context *brw)
>> +{
>> +   struct disk_cache *cache = brw->ctx.Cache;
>> +   if (cache == NULL)
>> +      return;
>> +
>> +   struct gl_program *prog =
>> +      brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
>> +   if (prog && !prog->program_written_to_cache) {
>> +      struct brw_vs_prog_key vs_key;
>> +      brw_vs_populate_key(brw, &vs_key);
>> +      vs_key.program_string_id = 0;
>> +
>> +      write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
>> +                         brw->vs.base.prog_data->program_size,
>>
>
We don't need to pass this in explicitly

+                         sizeof(struct brw_vs_prog_data),
>> +                         brw->vs.base.prog_offset, cache,
>> +                         MESA_SHADER_VERTEX);
>> +   }
>> +
>> +   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
>> +   if (prog && !prog->program_written_to_cache) {
>> +      struct brw_wm_prog_key wm_key;
>> +      brw_wm_populate_key(brw, &wm_key);
>> +      wm_key.program_string_id = 0;
>> +
>> +      write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
>> +                         brw->wm.base.prog_data->program_size,
>>
>
Or here


> +                         sizeof(struct brw_wm_prog_data),
>> +                         brw->wm.base.prog_offset, cache,
>> +                         MESA_SHADER_FRAGMENT);
>> +   }
>> +}
>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h
>> b/src/mesa/drivers/dri/i965/brw_state.h
>> index 8db354cf23..6f2e0501b4 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>> @@ -131,6 +131,11 @@ void brw_upload_state_base_address(struct
>> brw_context *brw);
>>  void gen8_write_pma_stall_bits(struct brw_context *brw,
>>                                 uint32_t pma_stall_bits);
>>
>> +/* brw_disk_cache.c */
>> +bool brw_disk_cache_upload_program(struct brw_context *brw,
>> +                                   gl_shader_stage stage);
>> +void brw_disk_cache_write_program(struct brw_context *brw);
>> +
>>  /***********************************************************************
>>   * brw_state.c
>>   */
>> diff --git a/src/mesa/drivers/dri/i965/meson.build
>> b/src/mesa/drivers/dri/i965/meson.build
>> index 144a254bd6..09e1179adc 100644
>> --- a/src/mesa/drivers/dri/i965/meson.build
>> +++ b/src/mesa/drivers/dri/i965/meson.build
>> @@ -34,6 +34,7 @@ files_i965 = files(
>>    'brw_cs.h',
>>    'brw_curbe.c',
>>    'brw_defines.h',
>> +  'brw_disk_cache.c',
>>    'brw_draw.c',
>>    'brw_draw.h',
>>    'brw_draw_upload.c',
>> --
>> 2.15.0.rc0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171020/b9ba0843/attachment-0001.html>


More information about the mesa-dev mailing list