[Mesa-dev] [PATCH 06/27] i965: add initial implementation of on disk shader cache

Jordan Justen jordan.l.justen at intel.com
Sat Aug 19 07:44:22 UTC 2017


From: Timothy Arceri <timothy.arceri at collabora.com>

This uses the recently-added disk_cache.c to write out the final
linked binary for vertex and fragment shader programs.

This is based off the initial implementation done by Carl Worth.

[jordan.l.justen at intel.com: *_cached_program => brw_disk_cache_*_program]
[jordan.l.justen at intel.com: brw_shader_cache.c => brw_disk_cache.c]
[jordan.l.justen at intel.com: don't map to write program when LLC is present]
[jordan.l.justen at intel.com: set program_written_to_cache on read from cache]
[jordan.l.justen at intel.com: only try cache when status is linking_skipped]
Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_disk_cache.c | 395 +++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_state.h      |   5 +
 3 files changed, 401 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_disk_cache.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 425c883de8..6e21010bae 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -14,6 +14,7 @@ i965_FILES = \
 	brw_cs.h \
 	brw_curbe.c \
 	brw_defines.h \
+	brw_disk_cache.cpp \
 	brw_draw.c \
 	brw_draw.h \
 	brw_draw_upload.c \
diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c
new file mode 100644
index 0000000000..b56e561e14
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/glsl/blob.h"
+#include "compiler/glsl/ir_uniform.h"
+#include "compiler/glsl/shader_cache.h"
+#include "main/mtypes.h"
+#include "util/disk_cache.h"
+#include "util/macros.h"
+#include "util/mesa-sha1.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+
+static uint64_t
+ptr_to_uint64_t(void *ptr)
+{
+   uint64_t ptr_int = (uint64_t) ptr;
+#if __i386__
+   ptr_int &= 0xFFFFFFFF;
+#endif
+   return ptr_int;
+}
+
+static size_t
+key_size(gl_shader_stage stage)
+{
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      return sizeof(struct brw_vs_prog_key);
+   case MESA_SHADER_TESS_CTRL:
+      return sizeof(struct brw_tcs_prog_key);
+   case MESA_SHADER_TESS_EVAL:
+      return sizeof(struct brw_tes_prog_key);
+   case MESA_SHADER_GEOMETRY:
+      return sizeof(struct brw_gs_prog_key);
+   case MESA_SHADER_FRAGMENT:
+      return sizeof(struct brw_wm_prog_key);
+   case MESA_SHADER_COMPUTE:
+      return sizeof(struct brw_cs_prog_key);
+   default:
+      unreachable("Unsupported stage!");
+   }
+}
+
+static void
+gen_shader_sha1(struct brw_context *brw, struct gl_program *prog,
+                gl_shader_stage stage, void *key, unsigned char *out_sha1)
+{
+   char sha1_buf[41];
+   unsigned char sha1[20];
+   char manifest[256];
+   int offset = 0;
+
+   _mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
+   offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf);
+
+   _mesa_sha1_compute(key, key_size(stage), sha1);
+   _mesa_sha1_format(sha1_buf, sha1);
+   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
+                      "%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage),
+                      sha1_buf);
+
+   _mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
+}
+
+static void
+load_program_data(struct gl_program *glprog, struct blob_reader *binary,
+                  struct brw_stage_prog_data *prog_data,
+                  gl_shader_stage stage, struct gl_context *ctx)
+{
+   static const gl_constant_value zero = { 0 };
+
+   uint64_t parameter_values_base = blob_read_uint64(binary);
+   uint64_t uniform_data_slots_base = blob_read_uint64(binary);
+
+   uint32_t nr_params = blob_read_uint32(binary);
+   assert(nr_params == prog_data->nr_params);
+
+   prog_data->param = rzalloc_array(NULL, const gl_constant_value *,
+                                    nr_params);
+   if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
+      fprintf(stderr, "Allocating %d prog_data->params (%p)\n",
+              prog_data->nr_params, prog_data->param);
+   }
+
+   for (unsigned i = 0; i < nr_params; i++) {
+      uint64_t param = blob_read_uint64(binary);
+      ptrdiff_t p_offset, u_offset;
+      struct gl_program_parameter_list *param_list = glprog->Parameters;
+
+      p_offset = (param - parameter_values_base) / sizeof(gl_constant_value);
+      u_offset = (param - uniform_data_slots_base) / sizeof(gl_constant_value);
+
+      if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) {
+         prog_data->param[i] =
+            ((gl_constant_value *) param_list->ParameterValues) + p_offset;
+      } else if (u_offset >= 0 &&
+                 u_offset < glprog->sh.data->NumUniformDataSlots) {
+         prog_data->param[i] = glprog->sh.data->UniformDataSlots + u_offset;
+      } else {
+         prog_data->param[i] = &zero;
+      }
+   }
+
+   uint32_t nr_pull_params = blob_read_uint32(binary);
+   assert(nr_pull_params == prog_data->nr_pull_params);
+
+   prog_data->pull_param = rzalloc_array(NULL, const gl_constant_value *,
+                                         nr_pull_params);
+
+   for (unsigned i = 0; i < nr_pull_params; i++) {
+      /* FIXME: We need to fixup pull_params pointers here. */
+   }
+}
+
+#define SET_UPLOAD_PRAMS(sh, sh_caps, prog)                          \
+      assert(prog_data_size == sizeof(struct brw_##sh##_prog_data)); \
+      sh##_key.program_string_id = prog->id;                         \
+      cache_id = BRW_CACHE_##sh_caps##_PROG;                         \
+      key = &sh##_key;                                               \
+      max_threads = devinfo->max_##sh##_threads;                     \
+      stage_state = &brw->sh.base;                                   \
+
+static bool
+read_and_upload(struct brw_context *brw, struct disk_cache *cache,
+                struct blob_reader *binary, struct gl_program *prog,
+                gl_shader_stage stage)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   unsigned char binary_sha1[20];
+
+   struct brw_wm_prog_key wm_key;
+   struct brw_vs_prog_key vs_key;
+
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      brw_vs_populate_key(brw, &vs_key);
+      /* We don't care what instance of the program it is we only care if
+       * its the correct binary to load so ignore program id for on disk cache.
+       */
+      vs_key.program_string_id = 0;
+      gen_shader_sha1(brw, prog, stage, &vs_key, binary_sha1);
+      break;
+   case MESA_SHADER_FRAGMENT:
+      brw_wm_populate_key(brw, &wm_key);
+      wm_key.program_string_id = 0;
+      gen_shader_sha1(brw, prog, stage, &wm_key, binary_sha1);
+      break;
+   default:
+      unreachable("Unsupported stage!");
+   }
+
+   size_t size;
+   uint8_t *buffer = disk_cache_get(cache, binary_sha1, &size);
+   if (buffer == NULL) {
+      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+         char sha1_buf[41];
+         _mesa_sha1_format(sha1_buf, binary_sha1);
+         fprintf(stderr, "No cached %s binary found for: %s\n",
+                 _mesa_shader_stage_to_abbrev(stage), sha1_buf);
+      }
+      return false;
+   }
+
+   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+      char sha1_buf[41];
+      _mesa_sha1_format(sha1_buf, binary_sha1);
+      fprintf(stderr, "attempting to populate bo cache with binary: %s\n",
+              sha1_buf);
+   }
+
+   blob_reader_init(binary, buffer, size);
+
+   /* Read shader program from blob. */
+   size_t program_size = blob_read_uint32(binary);
+   uint8_t *program = blob_read_bytes(binary, program_size);
+
+   /* Read shader program_data from blob. */
+   size_t prog_data_size = blob_read_uint32(binary);
+   struct brw_stage_prog_data *prog_data =
+      blob_read_bytes(binary, prog_data_size);
+
+   /* Upload params set by SET_UPLOAD_PRAMS() */
+   struct brw_stage_state *stage_state;
+   enum brw_cache_id cache_id;
+   unsigned max_threads;
+   void *key;
+
+   switch (stage) {
+   case MESA_SHADER_VERTEX: {
+      struct brw_program *vp = (struct brw_program *) prog;
+      SET_UPLOAD_PRAMS(vs, VS, vp)
+      break;
+   }
+   case MESA_SHADER_FRAGMENT: {
+      struct brw_program *wp = (struct brw_program *) prog;
+      SET_UPLOAD_PRAMS(wm, FS, wp)
+      break;
+   }
+   default:
+      unreachable("Unsupported stage!");
+   }
+
+   load_program_data(prog, binary, prog_data, stage, &brw->ctx);
+
+   if (binary->current != binary->end || binary->overrun) {
+      /* Something very bad has gone wrong discard the item from the cache and
+       * rebuild from source.
+       */
+      assert(!"Invalid i965 shader disk cache item!");
+
+      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+         fprintf(stderr, "Error reading program from cache (invalid i965 "
+                 "cache item)\n");
+      }
+
+      disk_cache_remove(cache, binary_sha1);
+      free(buffer);
+      return false;
+   }
+
+   brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch,
+                           max_threads);
+
+   brw_upload_cache(&brw->cache, cache_id, key, key_size(stage), program,
+                    program_size, prog_data, prog_data_size,
+                    &stage_state->prog_offset, &stage_state->prog_data);
+
+   prog->program_written_to_cache = true;
+
+   free(buffer);
+
+   return true;
+}
+
+bool
+brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage)
+{
+   struct blob_reader binary;
+
+   struct disk_cache *cache = brw->ctx.Cache;
+   if (cache == NULL)
+      return false;
+
+   struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
+   if (prog == NULL)
+      return false;
+
+   if (prog->sh.data->LinkStatus != linking_skipped)
+      goto FAIL;
+
+   if (!read_and_upload(brw, cache, &binary, prog, stage))
+      goto FAIL;
+
+   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+      fprintf(stderr, "%s: Successfully read every byte written!\n",
+              __FUNCTION__);
+   }
+
+   return true;
+
+FAIL:
+   /*FIXME: Fall back and compile from source here. */
+   return false;
+}
+
+static void
+write_program_data(struct brw_context *brw, struct gl_program *prog,
+                   void *key, struct brw_stage_prog_data *prog_data,
+                   size_t program_size, size_t prog_data_size,
+                   uint32_t prog_offset, struct disk_cache *cache,
+                   gl_shader_stage stage)
+{
+   unsigned char sha1[20];
+   char buf[41];
+
+   struct blob *binary = blob_create();
+   if (!binary)
+      return;
+
+   gen_shader_sha1(brw, prog, stage, key, sha1);
+
+   /* Write program to blob. */
+   blob_write_uint32(binary, program_size);
+
+   uint8_t *blob_cursor = blob_reserve_bytes(binary, program_size);
+
+   /* Copy program binary */
+   if (brw->has_llc) {
+      memcpy(blob_cursor, brw->cache.map + prog_offset, program_size);
+   } else {
+      void *map = brw_bo_map(brw, brw->cache.bo, MAP_READ);
+      if (unlikely(!map)) {
+         _mesa_error_no_memory(__func__);
+         return;
+      }
+      memcpy(blob_cursor, map + prog_offset, program_size);
+      brw_bo_unmap(brw->cache.bo);
+   }
+
+   /* Write program_data to blob. */
+   blob_write_uint32(binary, prog_data_size);
+   blob_write_bytes(binary, prog_data, prog_data_size);
+
+   /* Include variable-length params from end of brw_stage_prog_data as well.
+    *
+    * Before writing either of the params or pull_params arrays, we first
+    * write out the addresses of the ParameterValues and UniformDataSlots
+    * storage. The pointers within params will be pointers to within one of
+    * these blocks of storage. So we can use the addresses of this storage
+    * together with the pointer values to correctly construct pointers to the
+    * actual storage when the program data is loaded from the cache.
+    */
+   blob_write_uint64(binary,
+                     ptr_to_uint64_t(prog->Parameters->ParameterValues));
+
+   blob_write_uint64(binary, ptr_to_uint64_t(prog->sh.data->UniformDataSlots));
+
+   blob_write_uint32(binary, prog_data->nr_params);
+
+   for (unsigned i = 0; i < prog_data->nr_params; i++) {
+      blob_write_uint64(binary, ptr_to_uint64_t((void *) prog_data->param[i]));
+   }
+
+   blob_write_uint32(binary, prog_data->nr_pull_params);
+
+   _mesa_sha1_format(buf, sha1);
+   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+      fprintf(stderr, "putting binary in cache: %s\n", buf);
+   }
+
+   disk_cache_put(cache, sha1, binary->data, binary->size);
+
+   prog->program_written_to_cache = true;
+   free(binary);
+}
+
+void
+brw_disk_cache_write_program(struct brw_context *brw)
+{
+   struct disk_cache *cache = brw->ctx.Cache;
+   if (cache == NULL)
+      return;
+
+   struct gl_program *prog =
+      brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
+   if (prog && !prog->program_written_to_cache) {
+      struct brw_vs_prog_key vs_key;
+      brw_vs_populate_key(brw, &vs_key);
+      vs_key.program_string_id = 0;
+
+      write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
+                         brw->vs.base.prog_data->program_size,
+                         sizeof(struct brw_vs_prog_data),
+                         brw->vs.base.prog_offset, cache,
+                         MESA_SHADER_VERTEX);
+   }
+
+   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+   if (prog && !prog->program_written_to_cache) {
+      struct brw_wm_prog_key wm_key;
+      brw_wm_populate_key(brw, &wm_key);
+      wm_key.program_string_id = 0;
+
+      write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
+                         brw->wm.base.prog_data->program_size,
+                         sizeof(struct brw_wm_prog_data),
+                         brw->wm.base.prog_offset, cache,
+                         MESA_SHADER_FRAGMENT);
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 46665aae12..8b468c752c 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -131,6 +131,11 @@ void brw_upload_state_base_address(struct brw_context *brw);
 void gen8_write_pma_stall_bits(struct brw_context *brw,
                                uint32_t pma_stall_bits);
 
+/* brw_disk_cache.c */
+bool brw_disk_cache_upload_program(struct brw_context *brw,
+                                   gl_shader_stage stage);
+void brw_disk_cache_write_program(struct brw_context *brw);
+
 /***********************************************************************
  * brw_state.c
  */
-- 
2.14.0



More information about the mesa-dev mailing list